From 9c3c16143d3872a6326e3ff23ffceed2cbd69134 Mon Sep 17 00:00:00 2001 From: Mathieu Poliquin Date: Mon, 24 Jul 2023 22:20:43 +0800 Subject: [PATCH] Sega Saturn emulator + Virtua Fighter 2 integration Doesn't include unit tests because the Saturn emulator requires a BIOS file that can't be commited to a public repo (same issue as for roms) --- CMakeLists.txt | 1 + cores/saturn.json | 15 + cores/saturn/.gitignore | 7 + cores/saturn/.gitlab-ci.yml | 148 + cores/saturn/.travis.yml | 32 + cores/saturn/COPYING | 341 + cores/saturn/Makefile | 495 + cores/saturn/Makefile.common | 194 + cores/saturn/README.md | 47 + cores/saturn/deps/crypto/README.md | 17 + cores/saturn/deps/crypto/aes.c | 1095 + cores/saturn/deps/crypto/aes.h | 123 + cores/saturn/deps/crypto/arcfour.c | 47 + cores/saturn/deps/crypto/arcfour.h | 30 + cores/saturn/deps/crypto/base64.c | 135 + cores/saturn/deps/crypto/base64.h | 27 + cores/saturn/deps/crypto/blowfish.c | 269 + cores/saturn/deps/crypto/blowfish.h | 32 + cores/saturn/deps/crypto/crypto_types.h | 15 + cores/saturn/deps/crypto/des.c | 269 + cores/saturn/deps/crypto/des.h | 37 + cores/saturn/deps/crypto/md2.c | 104 + cores/saturn/deps/crypto/md2.h | 33 + cores/saturn/deps/crypto/md5.c | 189 + cores/saturn/deps/crypto/md5.h | 33 + cores/saturn/deps/crypto/rot-13.c | 35 + cores/saturn/deps/crypto/rot-13.h | 20 + cores/saturn/deps/crypto/sha1.c | 149 + cores/saturn/deps/crypto/sha1.h | 34 + cores/saturn/deps/crypto/sha256.c | 158 + cores/saturn/deps/crypto/sha256.h | 34 + cores/saturn/deps/libchdr/CMakeLists.txt | 123 + cores/saturn/deps/libchdr/LICENSE.txt | 24 + cores/saturn/deps/libchdr/README.md | 7 + .../deps/libchdr/include/dr_libs/dr_flac.h | 10739 +++ .../deps/libchdr/include/libchdr/bitstream.h | 43 + .../deps/libchdr/include/libchdr/cdrom.h | 110 + .../saturn/deps/libchdr/include/libchdr/chd.h | 425 + .../deps/libchdr/include/libchdr/chdconfig.h | 10 + .../deps/libchdr/include/libchdr/coretypes.h | 48 + .../deps/libchdr/include/libchdr/flac.h | 50 + .../deps/libchdr/include/libchdr/huffman.h | 90 + .../deps/libchdr/src/libchdr_bitstream.c | 125 + cores/saturn/deps/libchdr/src/libchdr_cdrom.c | 415 + cores/saturn/deps/libchdr/src/libchdr_chd.c | 2672 + cores/saturn/deps/libchdr/src/libchdr_flac.c | 302 + .../saturn/deps/libchdr/src/libchdr_huffman.c | 544 + cores/saturn/deps/libchdr/src/link.T | 5 + cores/saturn/deps/lzma/CMakeLists.txt | 32 + cores/saturn/deps/lzma/LICENSE | 3 + cores/saturn/deps/lzma/include/7zTypes.h | 375 + cores/saturn/deps/lzma/include/Alloc.h | 51 + cores/saturn/deps/lzma/include/Bra.h | 64 + cores/saturn/deps/lzma/include/Compiler.h | 33 + cores/saturn/deps/lzma/include/CpuArch.h | 336 + cores/saturn/deps/lzma/include/Delta.h | 19 + cores/saturn/deps/lzma/include/LzFind.h | 121 + cores/saturn/deps/lzma/include/LzHash.h | 57 + cores/saturn/deps/lzma/include/Lzma86.h | 111 + cores/saturn/deps/lzma/include/LzmaDec.h | 234 + cores/saturn/deps/lzma/include/LzmaEnc.h | 76 + cores/saturn/deps/lzma/include/LzmaLib.h | 131 + cores/saturn/deps/lzma/include/Precomp.h | 10 + cores/saturn/deps/lzma/include/Sort.h | 18 + cores/saturn/deps/lzma/lzma-history.txt | 446 + cores/saturn/deps/lzma/lzma.txt | 328 + cores/saturn/deps/lzma/lzma.vcxproj | 543 + cores/saturn/deps/lzma/lzma.vcxproj.filters | 17 + cores/saturn/deps/lzma/src/Alloc.c | 307 + cores/saturn/deps/lzma/src/Bra86.c | 82 + cores/saturn/deps/lzma/src/BraIA64.c | 53 + cores/saturn/deps/lzma/src/CpuArch.c | 218 + cores/saturn/deps/lzma/src/Delta.c | 64 + cores/saturn/deps/lzma/src/LzFind.c | 1127 + cores/saturn/deps/lzma/src/Lzma86Dec.c | 54 + cores/saturn/deps/lzma/src/Lzma86Enc.c | 106 + cores/saturn/deps/lzma/src/LzmaDec.c | 1185 + cores/saturn/deps/lzma/src/LzmaEnc.c | 1330 + cores/saturn/deps/zlib/README | 115 + cores/saturn/deps/zlib/adler32.c | 102 + cores/saturn/deps/zlib/crc32.c | 46 + cores/saturn/deps/zlib/crc32.h | 61 + cores/saturn/deps/zlib/inffast.c | 264 + cores/saturn/deps/zlib/inffast.h | 11 + cores/saturn/deps/zlib/inflate.c | 1050 + cores/saturn/deps/zlib/inflate.h | 124 + cores/saturn/deps/zlib/inftrees.c | 304 + cores/saturn/deps/zlib/inftrees.h | 62 + cores/saturn/deps/zlib/zconf.h | 274 + cores/saturn/deps/zlib/zlib.h | 1065 + cores/saturn/deps/zlib/zutil.h | 91 + cores/saturn/disc.cpp | 827 + cores/saturn/disc.h | 27 + cores/saturn/input.cpp | 1748 + cores/saturn/input.h | 30 + cores/saturn/jni/Android.mk | 37 + cores/saturn/jni/Application.mk | 2 + cores/saturn/libretro-common/cdrom/cdrom.c | 1745 + .../compat/compat_posix_string.c | 104 + .../compat/compat_strcasestr.c | 58 + .../libretro-common/compat/compat_strl.c | 62 + .../libretro-common/compat/fopen_utf8.c | 64 + .../libretro-common/encodings/encoding_utf.c | 532 + cores/saturn/libretro-common/file/file_path.c | 1426 + .../libretro-common/file/retro_dirent.c | 122 + .../saturn/libretro-common/include/boolean.h | 39 + .../libretro-common/include/cdrom/cdrom.h | 125 + .../include/compat/apple_compat.h | 84 + .../libretro-common/include/compat/fnmatch.h | 30 + .../include/compat/fopen_utf8.h | 34 + .../libretro-common/include/compat/getopt.h | 74 + .../libretro-common/include/compat/ifaddrs.h | 53 + .../include/compat/intrinsics.h | 99 + .../libretro-common/include/compat/msvc.h | 126 + .../include/compat/msvc/stdint.h | 255 + .../include/compat/posix_string.h | 60 + .../include/compat/strcasestr.h | 48 + .../libretro-common/include/compat/strl.h | 59 + .../libretro-common/include/encodings/utf.h | 137 + .../libretro-common/include/file/file_path.h | 679 + .../saturn/libretro-common/include/libretro.h | 3964 + .../libretro-common/include/lists/dir_list.h | 100 + .../include/lists/string_list.h | 205 + .../saturn/libretro-common/include/memalign.h | 40 + cores/saturn/libretro-common/include/memmap.h | 52 + .../libretro-common/include/retro_assert.h | 35 + .../libretro-common/include/retro_common.h | 36 + .../include/retro_common_api.h | 119 + .../libretro-common/include/retro_dirent.h | 77 + .../include/retro_endianness.h | 580 + .../include/retro_environment.h | 114 + .../libretro-common/include/retro_inline.h | 39 + .../libretro-common/include/retro_math.h | 190 + .../include/retro_miscellaneous.h | 218 + .../libretro-common/include/retro_timers.h | 112 + .../include/rthreads/async_job.h | 35 + .../include/rthreads/rsemaphore.h | 54 + .../include/rthreads/rthreads.h | 269 + .../include/streams/file_stream.h | 142 + .../include/streams/file_stream_transforms.h | 101 + .../include/string/stdstring.h | 387 + .../libretro-common/include/time/rtime.h | 48 + .../saturn/libretro-common/include/vfs/vfs.h | 111 + .../include/vfs/vfs_implementation.h | 82 + .../include/vfs/vfs_implementation_cdrom.h | 52 + cores/saturn/libretro-common/lists/dir_list.c | 280 + .../libretro-common/lists/string_list.c | 539 + .../saturn/libretro-common/memmap/memalign.c | 63 + .../libretro-common/rthreads/async_job.c | 149 + .../libretro-common/rthreads/gx_pthread.h | 186 + .../libretro-common/rthreads/psp_pthread.h | 305 + .../libretro-common/rthreads/rsemaphore.c | 116 + .../libretro-common/rthreads/rthreads.c | 933 + .../rthreads/xenon_sdl_threads.c | 58 + .../libretro-common/streams/file_stream.c | 659 + .../streams/file_stream_transforms.c | 195 + .../saturn/libretro-common/string/stdstring.c | 728 + cores/saturn/libretro-common/time/rtime.c | 78 + .../libretro-common/vfs/vfs_implementation.c | 1246 + .../libretro-common/vfs/vfs_implementation.d | 8 + .../vfs/vfs_implementation_cdrom.c | 495 + .../vfs/vfs_implementation_uwp.cpp | 808 + cores/saturn/libretro.cpp | 1044 + cores/saturn/libretro_core_options.h | 1017 + cores/saturn/libretro_settings.cpp | 17 + cores/saturn/libretro_settings.h | 35 + cores/saturn/link.T | 5 + cores/saturn/mednafen/FileStream.cpp | 104 + cores/saturn/mednafen/FileStream.h | 48 + cores/saturn/mednafen/MemoryStream.cpp | 191 + cores/saturn/mednafen/MemoryStream.h | 64 + cores/saturn/mednafen/Stream.cpp | 48 + cores/saturn/mednafen/Stream.h | 148 + cores/saturn/mednafen/cdrom/CDAFReader.cpp | 53 + cores/saturn/mednafen/cdrom/CDAFReader.h | 61 + .../mednafen/cdrom/CDAFReader_Vorbis.cpp | 128 + .../saturn/mednafen/cdrom/CDAFReader_Vorbis.h | 28 + cores/saturn/mednafen/cdrom/CDAccess.cpp | 58 + cores/saturn/mednafen/cdrom/CDAccess.h | 32 + cores/saturn/mednafen/cdrom/CDAccess_CCD.cpp | 412 + cores/saturn/mednafen/cdrom/CDAccess_CCD.h | 52 + cores/saturn/mednafen/cdrom/CDAccess_CHD.cpp | 471 + cores/saturn/mednafen/cdrom/CDAccess_CHD.h | 94 + .../saturn/mednafen/cdrom/CDAccess_Image.cpp | 1249 + cores/saturn/mednafen/cdrom/CDAccess_Image.h | 94 + cores/saturn/mednafen/cdrom/CDUtility.cpp | 423 + cores/saturn/mednafen/cdrom/CDUtility.h | 233 + cores/saturn/mednafen/cdrom/cdromif.cpp | 577 + cores/saturn/mednafen/cdrom/cdromif.h | 59 + cores/saturn/mednafen/cdrom/dvdisaster.h | 149 + cores/saturn/mednafen/cdrom/edc_crc32.cpp | 130 + cores/saturn/mednafen/cdrom/galois-inlines.h | 40 + cores/saturn/mednafen/cdrom/galois.cpp | 153 + cores/saturn/mednafen/cdrom/l-ec.cpp | 408 + cores/saturn/mednafen/cdrom/lec.cpp | 595 + cores/saturn/mednafen/cdrom/lec.h | 64 + cores/saturn/mednafen/cdrom/recover-raw.cpp | 202 + cores/saturn/mednafen/error.cpp | 51 + cores/saturn/mednafen/error.h | 28 + cores/saturn/mednafen/general.cpp | 231 + cores/saturn/mednafen/general.h | 39 + cores/saturn/mednafen/git.h | 147 + cores/saturn/mednafen/hash/md5.cpp | 260 + cores/saturn/mednafen/hash/md5.h | 41 + cores/saturn/mednafen/hash/sha256.cpp | 170 + cores/saturn/mednafen/hash/sha256.h | 57 + cores/saturn/mednafen/hw_cpu/m68k/m68k.cpp | 2270 + cores/saturn/mednafen/hw_cpu/m68k/m68k.h | 476 + .../mednafen/hw_cpu/m68k/m68k_instr.inc | 61700 ++++++++++++++++ cores/saturn/mednafen/jump.h | 76 + cores/saturn/mednafen/masmem.h | 209 + cores/saturn/mednafen/math_ops.h | 272 + cores/saturn/mednafen/mednafen-driver.h | 22 + cores/saturn/mednafen/mednafen-endian.c | 167 + cores/saturn/mednafen/mednafen-endian.h | 383 + cores/saturn/mednafen/mednafen-types.h | 116 + cores/saturn/mednafen/mednafen.h | 34 + cores/saturn/mednafen/mempatcher-driver.h | 47 + cores/saturn/mednafen/mempatcher.cpp | 672 + cores/saturn/mednafen/mempatcher.h | 30 + cores/saturn/mednafen/settings-common.h | 57 + cores/saturn/mednafen/settings-driver.h | 8 + cores/saturn/mednafen/settings.cpp | 71 + cores/saturn/mednafen/settings.h | 14 + cores/saturn/mednafen/ss/cart.cpp | 209 + cores/saturn/mednafen/ss/cart.h | 106 + cores/saturn/mednafen/ss/cart/ar4mp.cpp | 160 + cores/saturn/mednafen/ss/cart/ar4mp.h | 29 + cores/saturn/mednafen/ss/cart/backup.cpp | 102 + cores/saturn/mednafen/ss/cart/backup.h | 27 + cores/saturn/mednafen/ss/cart/common.h | 5 + cores/saturn/mednafen/ss/cart/cs1ram.cpp | 78 + cores/saturn/mednafen/ss/cart/cs1ram.h | 27 + cores/saturn/mednafen/ss/cart/debug.cpp | 55 + cores/saturn/mednafen/ss/cart/debug.h | 27 + cores/saturn/mednafen/ss/cart/extram.cpp | 92 + cores/saturn/mednafen/ss/cart/extram.h | 27 + cores/saturn/mednafen/ss/cart/rom.cpp | 44 + cores/saturn/mednafen/ss/cart/rom.h | 29 + cores/saturn/mednafen/ss/cdb.cpp | 4218 ++ cores/saturn/mednafen/ss/cdb.h | 69 + cores/saturn/mednafen/ss/db.cpp | 683 + cores/saturn/mednafen/ss/db.h | 39 + cores/saturn/mednafen/ss/input/3dpad.cpp | 165 + cores/saturn/mednafen/ss/input/3dpad.h | 50 + cores/saturn/mednafen/ss/input/common.h | 4 + cores/saturn/mednafen/ss/input/gamepad.cpp | 69 + cores/saturn/mednafen/ss/input/gamepad.h | 41 + cores/saturn/mednafen/ss/input/gun.cpp | 307 + cores/saturn/mednafen/ss/input/gun.h | 60 + cores/saturn/mednafen/ss/input/jpkeyboard.cpp | 263 + cores/saturn/mednafen/ss/input/jpkeyboard.h | 69 + cores/saturn/mednafen/ss/input/keyboard.cpp | 277 + cores/saturn/mednafen/ss/input/keyboard.h | 68 + cores/saturn/mednafen/ss/input/mission.cpp | 190 + cores/saturn/mednafen/ss/input/mission.h | 54 + cores/saturn/mednafen/ss/input/mouse.cpp | 155 + cores/saturn/mednafen/ss/input/mouse.h | 48 + cores/saturn/mednafen/ss/input/multitap.cpp | 270 + cores/saturn/mednafen/ss/input/multitap.h | 60 + cores/saturn/mednafen/ss/input/wheel.cpp | 135 + cores/saturn/mednafen/ss/input/wheel.h | 47 + .../ss/notes/HOST-CPU-INTENSIVE-GAMES | 7 + .../mednafen/ss/notes/PROBLEMATIC-GAMES | 215 + cores/saturn/mednafen/ss/notes/REFERENCES | 8 + cores/saturn/mednafen/ss/notes/SCSP-FM-GAMES | 90 + .../saturn/mednafen/ss/notes/WIDESCREEN-GAMES | 10 + .../ss/notes/build_sh7095s_ctable.cpp | 15 + cores/saturn/mednafen/ss/notes/games.txt | 84 + cores/saturn/mednafen/ss/notes/gen_dsp.cpp | 106 + cores/saturn/mednafen/ss/scsp.h | 304 + cores/saturn/mednafen/ss/scsp.inc | 1869 + cores/saturn/mednafen/ss/scu.h | 104 + cores/saturn/mednafen/ss/scu.inc | 2401 + cores/saturn/mednafen/ss/scu_aciv1tab.inc | 112 + cores/saturn/mednafen/ss/scu_actab.inc | 479 + cores/saturn/mednafen/ss/scu_btab.inc | 542 + cores/saturn/mednafen/ss/scu_dsp_common.inc | 238 + cores/saturn/mednafen/ss/scu_dsp_dmatab.inc | 52 + cores/saturn/mednafen/ss/scu_dsp_gen.cpp | 341 + cores/saturn/mednafen/ss/scu_dsp_gentab.inc | 8772 +++ cores/saturn/mednafen/ss/scu_dsp_jmp.cpp | 42 + cores/saturn/mednafen/ss/scu_dsp_jmptab.inc | 4 + cores/saturn/mednafen/ss/scu_dsp_misc.cpp | 68 + cores/saturn/mednafen/ss/scu_dsp_misctab.inc | 2 + cores/saturn/mednafen/ss/scu_dsp_mvi.cpp | 84 + cores/saturn/mednafen/ss/scu_dsp_mvitab.inc | 100 + cores/saturn/mednafen/ss/sh7095.h | 613 + cores/saturn/mednafen/ss/sh7095.inc | 4330 ++ .../saturn/mednafen/ss/sh7095_idecodetab.inc | 256 + cores/saturn/mednafen/ss/sh7095_opdefs.inc | 157 + cores/saturn/mednafen/ss/sh7095_ops.inc | 2029 + cores/saturn/mednafen/ss/sh7095s_cnorm.inc | 10000 +++ cores/saturn/mednafen/ss/sh7095s_cnorm_dm.inc | 20000 +++++ cores/saturn/mednafen/ss/sh7095s_ctable.inc | 1536 + .../saturn/mednafen/ss/sh7095s_ctable_dm.inc | 1536 + cores/saturn/mednafen/ss/sh7095s_rsu.inc | 36 + cores/saturn/mednafen/ss/smpc.cpp | 1508 + cores/saturn/mednafen/ss/smpc.h | 84 + cores/saturn/mednafen/ss/smpc_iodevice.h | 56 + cores/saturn/mednafen/ss/sound.cpp | 403 + cores/saturn/mednafen/ss/sound.h | 53 + cores/saturn/mednafen/ss/ss.cpp | 1394 + cores/saturn/mednafen/ss/ss.h | 157 + cores/saturn/mednafen/ss/ss_endian.h | 156 + cores/saturn/mednafen/ss/ss_memory.h | 106 + cores/saturn/mednafen/ss/vdp1.cpp | 1458 + cores/saturn/mednafen/ss/vdp1.h | 105 + cores/saturn/mednafen/ss/vdp1_common.h | 564 + cores/saturn/mednafen/ss/vdp1_line.cpp | 132 + cores/saturn/mednafen/ss/vdp1_poly.cpp | 179 + cores/saturn/mednafen/ss/vdp1_sprite.cpp | 340 + cores/saturn/mednafen/ss/vdp2.cpp | 1362 + cores/saturn/mednafen/ss/vdp2.h | 145 + cores/saturn/mednafen/ss/vdp2_common.h | 30 + cores/saturn/mednafen/ss/vdp2_render.cpp | 3484 + cores/saturn/mednafen/ss/vdp2_render.h | 60 + cores/saturn/mednafen/state.cpp | 424 + cores/saturn/mednafen/state.h | 192 + cores/saturn/mednafen/tremor/COPYING | 28 + cores/saturn/mednafen/tremor/README | 46 + cores/saturn/mednafen/tremor/asm_arm.h | 245 + cores/saturn/mednafen/tremor/backends.h | 131 + cores/saturn/mednafen/tremor/bitwise.c | 522 + cores/saturn/mednafen/tremor/block.c | 503 + cores/saturn/mednafen/tremor/block.h | 24 + cores/saturn/mednafen/tremor/codebook.c | 391 + cores/saturn/mednafen/tremor/codebook.h | 101 + cores/saturn/mednafen/tremor/codec_internal.h | 92 + cores/saturn/mednafen/tremor/floor0.c | 439 + cores/saturn/mednafen/tremor/floor1.c | 460 + cores/saturn/mednafen/tremor/framing.c | 1006 + cores/saturn/mednafen/tremor/info.c | 346 + cores/saturn/mednafen/tremor/ivorbiscodec.h | 199 + cores/saturn/mednafen/tremor/ivorbisfile.h | 109 + cores/saturn/mednafen/tremor/lsp_lookup.h | 136 + cores/saturn/mednafen/tremor/mapping0.c | 327 + cores/saturn/mednafen/tremor/mdct.c | 510 + cores/saturn/mednafen/tremor/mdct.h | 52 + cores/saturn/mednafen/tremor/mdct_lookup.h | 540 + cores/saturn/mednafen/tremor/misc.h | 252 + cores/saturn/mednafen/tremor/ogg.h | 210 + cores/saturn/mednafen/tremor/os.h | 44 + cores/saturn/mednafen/tremor/registry.c | 50 + cores/saturn/mednafen/tremor/registry.h | 40 + cores/saturn/mednafen/tremor/res012.c | 375 + cores/saturn/mednafen/tremor/sharedbook.c | 459 + cores/saturn/mednafen/tremor/synthesis.c | 130 + cores/saturn/mednafen/tremor/vorbisfile.c | 1635 + cores/saturn/mednafen/tremor/window.c | 83 + cores/saturn/mednafen/tremor/window.h | 27 + cores/saturn/mednafen/tremor/window_lookup.h | 2081 + cores/saturn/mednafen/video/Deinterlacer.cpp | 184 + cores/saturn/mednafen/video/Deinterlacer.h | 59 + cores/saturn/mednafen/video/surface.cpp | 94 + cores/saturn/mednafen/video/surface.h | 118 + retro/data/.gitignore | 2 + .../VirtuaFighter2-Saturn/AkiraVsLau.state | Bin 0 -> 1911087 bytes .../stable/VirtuaFighter2-Saturn/data.json | 16 + .../VirtuaFighter2-Saturn/metadata.json | 3 + .../data/stable/VirtuaFighter2-Saturn/rom.sha | 1 + .../VirtuaFighter2-Saturn/scenario.json | 32 + setup.py | 1 + 363 files changed, 219220 insertions(+) create mode 100644 cores/saturn.json create mode 100644 cores/saturn/.gitignore create mode 100644 cores/saturn/.gitlab-ci.yml create mode 100644 cores/saturn/.travis.yml create mode 100644 cores/saturn/COPYING create mode 100644 cores/saturn/Makefile create mode 100644 cores/saturn/Makefile.common create mode 100644 cores/saturn/README.md create mode 100644 cores/saturn/deps/crypto/README.md create mode 100644 cores/saturn/deps/crypto/aes.c create mode 100644 cores/saturn/deps/crypto/aes.h create mode 100644 cores/saturn/deps/crypto/arcfour.c create mode 100644 cores/saturn/deps/crypto/arcfour.h create mode 100644 cores/saturn/deps/crypto/base64.c create mode 100644 cores/saturn/deps/crypto/base64.h create mode 100644 cores/saturn/deps/crypto/blowfish.c create mode 100644 cores/saturn/deps/crypto/blowfish.h create mode 100644 cores/saturn/deps/crypto/crypto_types.h create mode 100644 cores/saturn/deps/crypto/des.c create mode 100644 cores/saturn/deps/crypto/des.h create mode 100644 cores/saturn/deps/crypto/md2.c create mode 100644 cores/saturn/deps/crypto/md2.h create mode 100644 cores/saturn/deps/crypto/md5.c create mode 100644 cores/saturn/deps/crypto/md5.h create mode 100644 cores/saturn/deps/crypto/rot-13.c create mode 100644 cores/saturn/deps/crypto/rot-13.h create mode 100644 cores/saturn/deps/crypto/sha1.c create mode 100644 cores/saturn/deps/crypto/sha1.h create mode 100644 cores/saturn/deps/crypto/sha256.c create mode 100644 cores/saturn/deps/crypto/sha256.h create mode 100644 cores/saturn/deps/libchdr/CMakeLists.txt create mode 100644 cores/saturn/deps/libchdr/LICENSE.txt create mode 100644 cores/saturn/deps/libchdr/README.md create mode 100644 cores/saturn/deps/libchdr/include/dr_libs/dr_flac.h create mode 100644 cores/saturn/deps/libchdr/include/libchdr/bitstream.h create mode 100644 cores/saturn/deps/libchdr/include/libchdr/cdrom.h create mode 100644 cores/saturn/deps/libchdr/include/libchdr/chd.h create mode 100644 cores/saturn/deps/libchdr/include/libchdr/chdconfig.h create mode 100644 cores/saturn/deps/libchdr/include/libchdr/coretypes.h create mode 100644 cores/saturn/deps/libchdr/include/libchdr/flac.h create mode 100644 cores/saturn/deps/libchdr/include/libchdr/huffman.h create mode 100644 cores/saturn/deps/libchdr/src/libchdr_bitstream.c create mode 100644 cores/saturn/deps/libchdr/src/libchdr_cdrom.c create mode 100644 cores/saturn/deps/libchdr/src/libchdr_chd.c create mode 100644 cores/saturn/deps/libchdr/src/libchdr_flac.c create mode 100644 cores/saturn/deps/libchdr/src/libchdr_huffman.c create mode 100644 cores/saturn/deps/libchdr/src/link.T create mode 100644 cores/saturn/deps/lzma/CMakeLists.txt create mode 100644 cores/saturn/deps/lzma/LICENSE create mode 100644 cores/saturn/deps/lzma/include/7zTypes.h create mode 100644 cores/saturn/deps/lzma/include/Alloc.h create mode 100644 cores/saturn/deps/lzma/include/Bra.h create mode 100644 cores/saturn/deps/lzma/include/Compiler.h create mode 100644 cores/saturn/deps/lzma/include/CpuArch.h create mode 100644 cores/saturn/deps/lzma/include/Delta.h create mode 100644 cores/saturn/deps/lzma/include/LzFind.h create mode 100644 cores/saturn/deps/lzma/include/LzHash.h create mode 100644 cores/saturn/deps/lzma/include/Lzma86.h create mode 100644 cores/saturn/deps/lzma/include/LzmaDec.h create mode 100644 cores/saturn/deps/lzma/include/LzmaEnc.h create mode 100644 cores/saturn/deps/lzma/include/LzmaLib.h create mode 100644 cores/saturn/deps/lzma/include/Precomp.h create mode 100644 cores/saturn/deps/lzma/include/Sort.h create mode 100644 cores/saturn/deps/lzma/lzma-history.txt create mode 100644 cores/saturn/deps/lzma/lzma.txt create mode 100644 cores/saturn/deps/lzma/lzma.vcxproj create mode 100644 cores/saturn/deps/lzma/lzma.vcxproj.filters create mode 100644 cores/saturn/deps/lzma/src/Alloc.c create mode 100644 cores/saturn/deps/lzma/src/Bra86.c create mode 100644 cores/saturn/deps/lzma/src/BraIA64.c create mode 100644 cores/saturn/deps/lzma/src/CpuArch.c create mode 100644 cores/saturn/deps/lzma/src/Delta.c create mode 100644 cores/saturn/deps/lzma/src/LzFind.c create mode 100644 cores/saturn/deps/lzma/src/Lzma86Dec.c create mode 100644 cores/saturn/deps/lzma/src/Lzma86Enc.c create mode 100644 cores/saturn/deps/lzma/src/LzmaDec.c create mode 100644 cores/saturn/deps/lzma/src/LzmaEnc.c create mode 100644 cores/saturn/deps/zlib/README create mode 100644 cores/saturn/deps/zlib/adler32.c create mode 100644 cores/saturn/deps/zlib/crc32.c create mode 100644 cores/saturn/deps/zlib/crc32.h create mode 100644 cores/saturn/deps/zlib/inffast.c create mode 100644 cores/saturn/deps/zlib/inffast.h create mode 100644 cores/saturn/deps/zlib/inflate.c create mode 100644 cores/saturn/deps/zlib/inflate.h create mode 100644 cores/saturn/deps/zlib/inftrees.c create mode 100644 cores/saturn/deps/zlib/inftrees.h create mode 100644 cores/saturn/deps/zlib/zconf.h create mode 100644 cores/saturn/deps/zlib/zlib.h create mode 100644 cores/saturn/deps/zlib/zutil.h create mode 100644 cores/saturn/disc.cpp create mode 100644 cores/saturn/disc.h create mode 100644 cores/saturn/input.cpp create mode 100644 cores/saturn/input.h create mode 100644 cores/saturn/jni/Android.mk create mode 100644 cores/saturn/jni/Application.mk create mode 100644 cores/saturn/libretro-common/cdrom/cdrom.c create mode 100644 cores/saturn/libretro-common/compat/compat_posix_string.c create mode 100644 cores/saturn/libretro-common/compat/compat_strcasestr.c create mode 100644 cores/saturn/libretro-common/compat/compat_strl.c create mode 100644 cores/saturn/libretro-common/compat/fopen_utf8.c create mode 100644 cores/saturn/libretro-common/encodings/encoding_utf.c create mode 100644 cores/saturn/libretro-common/file/file_path.c create mode 100644 cores/saturn/libretro-common/file/retro_dirent.c create mode 100644 cores/saturn/libretro-common/include/boolean.h create mode 100644 cores/saturn/libretro-common/include/cdrom/cdrom.h create mode 100644 cores/saturn/libretro-common/include/compat/apple_compat.h create mode 100644 cores/saturn/libretro-common/include/compat/fnmatch.h create mode 100644 cores/saturn/libretro-common/include/compat/fopen_utf8.h create mode 100644 cores/saturn/libretro-common/include/compat/getopt.h create mode 100644 cores/saturn/libretro-common/include/compat/ifaddrs.h create mode 100644 cores/saturn/libretro-common/include/compat/intrinsics.h create mode 100644 cores/saturn/libretro-common/include/compat/msvc.h create mode 100644 cores/saturn/libretro-common/include/compat/msvc/stdint.h create mode 100644 cores/saturn/libretro-common/include/compat/posix_string.h create mode 100644 cores/saturn/libretro-common/include/compat/strcasestr.h create mode 100644 cores/saturn/libretro-common/include/compat/strl.h create mode 100644 cores/saturn/libretro-common/include/encodings/utf.h create mode 100644 cores/saturn/libretro-common/include/file/file_path.h create mode 100644 cores/saturn/libretro-common/include/libretro.h create mode 100644 cores/saturn/libretro-common/include/lists/dir_list.h create mode 100644 cores/saturn/libretro-common/include/lists/string_list.h create mode 100644 cores/saturn/libretro-common/include/memalign.h create mode 100644 cores/saturn/libretro-common/include/memmap.h create mode 100644 cores/saturn/libretro-common/include/retro_assert.h create mode 100644 cores/saturn/libretro-common/include/retro_common.h create mode 100644 cores/saturn/libretro-common/include/retro_common_api.h create mode 100644 cores/saturn/libretro-common/include/retro_dirent.h create mode 100644 cores/saturn/libretro-common/include/retro_endianness.h create mode 100644 cores/saturn/libretro-common/include/retro_environment.h create mode 100644 cores/saturn/libretro-common/include/retro_inline.h create mode 100644 cores/saturn/libretro-common/include/retro_math.h create mode 100644 cores/saturn/libretro-common/include/retro_miscellaneous.h create mode 100644 cores/saturn/libretro-common/include/retro_timers.h create mode 100644 cores/saturn/libretro-common/include/rthreads/async_job.h create mode 100644 cores/saturn/libretro-common/include/rthreads/rsemaphore.h create mode 100644 cores/saturn/libretro-common/include/rthreads/rthreads.h create mode 100644 cores/saturn/libretro-common/include/streams/file_stream.h create mode 100644 cores/saturn/libretro-common/include/streams/file_stream_transforms.h create mode 100644 cores/saturn/libretro-common/include/string/stdstring.h create mode 100644 cores/saturn/libretro-common/include/time/rtime.h create mode 100644 cores/saturn/libretro-common/include/vfs/vfs.h create mode 100644 cores/saturn/libretro-common/include/vfs/vfs_implementation.h create mode 100644 cores/saturn/libretro-common/include/vfs/vfs_implementation_cdrom.h create mode 100644 cores/saturn/libretro-common/lists/dir_list.c create mode 100644 cores/saturn/libretro-common/lists/string_list.c create mode 100644 cores/saturn/libretro-common/memmap/memalign.c create mode 100644 cores/saturn/libretro-common/rthreads/async_job.c create mode 100644 cores/saturn/libretro-common/rthreads/gx_pthread.h create mode 100644 cores/saturn/libretro-common/rthreads/psp_pthread.h create mode 100644 cores/saturn/libretro-common/rthreads/rsemaphore.c create mode 100644 cores/saturn/libretro-common/rthreads/rthreads.c create mode 100644 cores/saturn/libretro-common/rthreads/xenon_sdl_threads.c create mode 100644 cores/saturn/libretro-common/streams/file_stream.c create mode 100644 cores/saturn/libretro-common/streams/file_stream_transforms.c create mode 100644 cores/saturn/libretro-common/string/stdstring.c create mode 100644 cores/saturn/libretro-common/time/rtime.c create mode 100644 cores/saturn/libretro-common/vfs/vfs_implementation.c create mode 100644 cores/saturn/libretro-common/vfs/vfs_implementation.d create mode 100644 cores/saturn/libretro-common/vfs/vfs_implementation_cdrom.c create mode 100644 cores/saturn/libretro-common/vfs/vfs_implementation_uwp.cpp create mode 100644 cores/saturn/libretro.cpp create mode 100644 cores/saturn/libretro_core_options.h create mode 100644 cores/saturn/libretro_settings.cpp create mode 100644 cores/saturn/libretro_settings.h create mode 100644 cores/saturn/link.T create mode 100644 cores/saturn/mednafen/FileStream.cpp create mode 100644 cores/saturn/mednafen/FileStream.h create mode 100644 cores/saturn/mednafen/MemoryStream.cpp create mode 100644 cores/saturn/mednafen/MemoryStream.h create mode 100644 cores/saturn/mednafen/Stream.cpp create mode 100644 cores/saturn/mednafen/Stream.h create mode 100644 cores/saturn/mednafen/cdrom/CDAFReader.cpp create mode 100644 cores/saturn/mednafen/cdrom/CDAFReader.h create mode 100644 cores/saturn/mednafen/cdrom/CDAFReader_Vorbis.cpp create mode 100644 cores/saturn/mednafen/cdrom/CDAFReader_Vorbis.h create mode 100644 cores/saturn/mednafen/cdrom/CDAccess.cpp create mode 100644 cores/saturn/mednafen/cdrom/CDAccess.h create mode 100644 cores/saturn/mednafen/cdrom/CDAccess_CCD.cpp create mode 100644 cores/saturn/mednafen/cdrom/CDAccess_CCD.h create mode 100644 cores/saturn/mednafen/cdrom/CDAccess_CHD.cpp create mode 100644 cores/saturn/mednafen/cdrom/CDAccess_CHD.h create mode 100644 cores/saturn/mednafen/cdrom/CDAccess_Image.cpp create mode 100644 cores/saturn/mednafen/cdrom/CDAccess_Image.h create mode 100644 cores/saturn/mednafen/cdrom/CDUtility.cpp create mode 100644 cores/saturn/mednafen/cdrom/CDUtility.h create mode 100644 cores/saturn/mednafen/cdrom/cdromif.cpp create mode 100644 cores/saturn/mednafen/cdrom/cdromif.h create mode 100644 cores/saturn/mednafen/cdrom/dvdisaster.h create mode 100644 cores/saturn/mednafen/cdrom/edc_crc32.cpp create mode 100644 cores/saturn/mednafen/cdrom/galois-inlines.h create mode 100644 cores/saturn/mednafen/cdrom/galois.cpp create mode 100644 cores/saturn/mednafen/cdrom/l-ec.cpp create mode 100644 cores/saturn/mednafen/cdrom/lec.cpp create mode 100644 cores/saturn/mednafen/cdrom/lec.h create mode 100644 cores/saturn/mednafen/cdrom/recover-raw.cpp create mode 100644 cores/saturn/mednafen/error.cpp create mode 100644 cores/saturn/mednafen/error.h create mode 100644 cores/saturn/mednafen/general.cpp create mode 100644 cores/saturn/mednafen/general.h create mode 100644 cores/saturn/mednafen/git.h create mode 100644 cores/saturn/mednafen/hash/md5.cpp create mode 100644 cores/saturn/mednafen/hash/md5.h create mode 100644 cores/saturn/mednafen/hash/sha256.cpp create mode 100644 cores/saturn/mednafen/hash/sha256.h create mode 100644 cores/saturn/mednafen/hw_cpu/m68k/m68k.cpp create mode 100644 cores/saturn/mednafen/hw_cpu/m68k/m68k.h create mode 100644 cores/saturn/mednafen/hw_cpu/m68k/m68k_instr.inc create mode 100644 cores/saturn/mednafen/jump.h create mode 100644 cores/saturn/mednafen/masmem.h create mode 100644 cores/saturn/mednafen/math_ops.h create mode 100644 cores/saturn/mednafen/mednafen-driver.h create mode 100644 cores/saturn/mednafen/mednafen-endian.c create mode 100644 cores/saturn/mednafen/mednafen-endian.h create mode 100644 cores/saturn/mednafen/mednafen-types.h create mode 100644 cores/saturn/mednafen/mednafen.h create mode 100644 cores/saturn/mednafen/mempatcher-driver.h create mode 100644 cores/saturn/mednafen/mempatcher.cpp create mode 100644 cores/saturn/mednafen/mempatcher.h create mode 100644 cores/saturn/mednafen/settings-common.h create mode 100644 cores/saturn/mednafen/settings-driver.h create mode 100644 cores/saturn/mednafen/settings.cpp create mode 100644 cores/saturn/mednafen/settings.h create mode 100644 cores/saturn/mednafen/ss/cart.cpp create mode 100644 cores/saturn/mednafen/ss/cart.h create mode 100644 cores/saturn/mednafen/ss/cart/ar4mp.cpp create mode 100644 cores/saturn/mednafen/ss/cart/ar4mp.h create mode 100644 cores/saturn/mednafen/ss/cart/backup.cpp create mode 100644 cores/saturn/mednafen/ss/cart/backup.h create mode 100644 cores/saturn/mednafen/ss/cart/common.h create mode 100644 cores/saturn/mednafen/ss/cart/cs1ram.cpp create mode 100644 cores/saturn/mednafen/ss/cart/cs1ram.h create mode 100644 cores/saturn/mednafen/ss/cart/debug.cpp create mode 100644 cores/saturn/mednafen/ss/cart/debug.h create mode 100644 cores/saturn/mednafen/ss/cart/extram.cpp create mode 100644 cores/saturn/mednafen/ss/cart/extram.h create mode 100644 cores/saturn/mednafen/ss/cart/rom.cpp create mode 100644 cores/saturn/mednafen/ss/cart/rom.h create mode 100644 cores/saturn/mednafen/ss/cdb.cpp create mode 100644 cores/saturn/mednafen/ss/cdb.h create mode 100644 cores/saturn/mednafen/ss/db.cpp create mode 100644 cores/saturn/mednafen/ss/db.h create mode 100644 cores/saturn/mednafen/ss/input/3dpad.cpp create mode 100644 cores/saturn/mednafen/ss/input/3dpad.h create mode 100644 cores/saturn/mednafen/ss/input/common.h create mode 100644 cores/saturn/mednafen/ss/input/gamepad.cpp create mode 100644 cores/saturn/mednafen/ss/input/gamepad.h create mode 100644 cores/saturn/mednafen/ss/input/gun.cpp create mode 100644 cores/saturn/mednafen/ss/input/gun.h create mode 100644 cores/saturn/mednafen/ss/input/jpkeyboard.cpp create mode 100644 cores/saturn/mednafen/ss/input/jpkeyboard.h create mode 100644 cores/saturn/mednafen/ss/input/keyboard.cpp create mode 100644 cores/saturn/mednafen/ss/input/keyboard.h create mode 100644 cores/saturn/mednafen/ss/input/mission.cpp create mode 100644 cores/saturn/mednafen/ss/input/mission.h create mode 100644 cores/saturn/mednafen/ss/input/mouse.cpp create mode 100644 cores/saturn/mednafen/ss/input/mouse.h create mode 100644 cores/saturn/mednafen/ss/input/multitap.cpp create mode 100644 cores/saturn/mednafen/ss/input/multitap.h create mode 100644 cores/saturn/mednafen/ss/input/wheel.cpp create mode 100644 cores/saturn/mednafen/ss/input/wheel.h create mode 100644 cores/saturn/mednafen/ss/notes/HOST-CPU-INTENSIVE-GAMES create mode 100644 cores/saturn/mednafen/ss/notes/PROBLEMATIC-GAMES create mode 100644 cores/saturn/mednafen/ss/notes/REFERENCES create mode 100644 cores/saturn/mednafen/ss/notes/SCSP-FM-GAMES create mode 100644 cores/saturn/mednafen/ss/notes/WIDESCREEN-GAMES create mode 100644 cores/saturn/mednafen/ss/notes/build_sh7095s_ctable.cpp create mode 100644 cores/saturn/mednafen/ss/notes/games.txt create mode 100644 cores/saturn/mednafen/ss/notes/gen_dsp.cpp create mode 100644 cores/saturn/mednafen/ss/scsp.h create mode 100644 cores/saturn/mednafen/ss/scsp.inc create mode 100644 cores/saturn/mednafen/ss/scu.h create mode 100644 cores/saturn/mednafen/ss/scu.inc create mode 100644 cores/saturn/mednafen/ss/scu_aciv1tab.inc create mode 100644 cores/saturn/mednafen/ss/scu_actab.inc create mode 100644 cores/saturn/mednafen/ss/scu_btab.inc create mode 100644 cores/saturn/mednafen/ss/scu_dsp_common.inc create mode 100644 cores/saturn/mednafen/ss/scu_dsp_dmatab.inc create mode 100644 cores/saturn/mednafen/ss/scu_dsp_gen.cpp create mode 100644 cores/saturn/mednafen/ss/scu_dsp_gentab.inc create mode 100644 cores/saturn/mednafen/ss/scu_dsp_jmp.cpp create mode 100644 cores/saturn/mednafen/ss/scu_dsp_jmptab.inc create mode 100644 cores/saturn/mednafen/ss/scu_dsp_misc.cpp create mode 100644 cores/saturn/mednafen/ss/scu_dsp_misctab.inc create mode 100644 cores/saturn/mednafen/ss/scu_dsp_mvi.cpp create mode 100644 cores/saturn/mednafen/ss/scu_dsp_mvitab.inc create mode 100644 cores/saturn/mednafen/ss/sh7095.h create mode 100644 cores/saturn/mednafen/ss/sh7095.inc create mode 100644 cores/saturn/mednafen/ss/sh7095_idecodetab.inc create mode 100644 cores/saturn/mednafen/ss/sh7095_opdefs.inc create mode 100644 cores/saturn/mednafen/ss/sh7095_ops.inc create mode 100644 cores/saturn/mednafen/ss/sh7095s_cnorm.inc create mode 100644 cores/saturn/mednafen/ss/sh7095s_cnorm_dm.inc create mode 100644 cores/saturn/mednafen/ss/sh7095s_ctable.inc create mode 100644 cores/saturn/mednafen/ss/sh7095s_ctable_dm.inc create mode 100644 cores/saturn/mednafen/ss/sh7095s_rsu.inc create mode 100644 cores/saturn/mednafen/ss/smpc.cpp create mode 100644 cores/saturn/mednafen/ss/smpc.h create mode 100644 cores/saturn/mednafen/ss/smpc_iodevice.h create mode 100644 cores/saturn/mednafen/ss/sound.cpp create mode 100644 cores/saturn/mednafen/ss/sound.h create mode 100644 cores/saturn/mednafen/ss/ss.cpp create mode 100644 cores/saturn/mednafen/ss/ss.h create mode 100644 cores/saturn/mednafen/ss/ss_endian.h create mode 100644 cores/saturn/mednafen/ss/ss_memory.h create mode 100644 cores/saturn/mednafen/ss/vdp1.cpp create mode 100644 cores/saturn/mednafen/ss/vdp1.h create mode 100644 cores/saturn/mednafen/ss/vdp1_common.h create mode 100644 cores/saturn/mednafen/ss/vdp1_line.cpp create mode 100644 cores/saturn/mednafen/ss/vdp1_poly.cpp create mode 100644 cores/saturn/mednafen/ss/vdp1_sprite.cpp create mode 100644 cores/saturn/mednafen/ss/vdp2.cpp create mode 100644 cores/saturn/mednafen/ss/vdp2.h create mode 100644 cores/saturn/mednafen/ss/vdp2_common.h create mode 100644 cores/saturn/mednafen/ss/vdp2_render.cpp create mode 100644 cores/saturn/mednafen/ss/vdp2_render.h create mode 100644 cores/saturn/mednafen/state.cpp create mode 100644 cores/saturn/mednafen/state.h create mode 100644 cores/saturn/mednafen/tremor/COPYING create mode 100644 cores/saturn/mednafen/tremor/README create mode 100644 cores/saturn/mednafen/tremor/asm_arm.h create mode 100644 cores/saturn/mednafen/tremor/backends.h create mode 100644 cores/saturn/mednafen/tremor/bitwise.c create mode 100644 cores/saturn/mednafen/tremor/block.c create mode 100644 cores/saturn/mednafen/tremor/block.h create mode 100644 cores/saturn/mednafen/tremor/codebook.c create mode 100644 cores/saturn/mednafen/tremor/codebook.h create mode 100644 cores/saturn/mednafen/tremor/codec_internal.h create mode 100644 cores/saturn/mednafen/tremor/floor0.c create mode 100644 cores/saturn/mednafen/tremor/floor1.c create mode 100644 cores/saturn/mednafen/tremor/framing.c create mode 100644 cores/saturn/mednafen/tremor/info.c create mode 100644 cores/saturn/mednafen/tremor/ivorbiscodec.h create mode 100644 cores/saturn/mednafen/tremor/ivorbisfile.h create mode 100644 cores/saturn/mednafen/tremor/lsp_lookup.h create mode 100644 cores/saturn/mednafen/tremor/mapping0.c create mode 100644 cores/saturn/mednafen/tremor/mdct.c create mode 100644 cores/saturn/mednafen/tremor/mdct.h create mode 100644 cores/saturn/mednafen/tremor/mdct_lookup.h create mode 100644 cores/saturn/mednafen/tremor/misc.h create mode 100644 cores/saturn/mednafen/tremor/ogg.h create mode 100644 cores/saturn/mednafen/tremor/os.h create mode 100644 cores/saturn/mednafen/tremor/registry.c create mode 100644 cores/saturn/mednafen/tremor/registry.h create mode 100644 cores/saturn/mednafen/tremor/res012.c create mode 100644 cores/saturn/mednafen/tremor/sharedbook.c create mode 100644 cores/saturn/mednafen/tremor/synthesis.c create mode 100644 cores/saturn/mednafen/tremor/vorbisfile.c create mode 100644 cores/saturn/mednafen/tremor/window.c create mode 100644 cores/saturn/mednafen/tremor/window.h create mode 100644 cores/saturn/mednafen/tremor/window_lookup.h create mode 100644 cores/saturn/mednafen/video/Deinterlacer.cpp create mode 100644 cores/saturn/mednafen/video/Deinterlacer.h create mode 100644 cores/saturn/mednafen/video/surface.cpp create mode 100644 cores/saturn/mednafen/video/surface.h create mode 100644 retro/data/stable/VirtuaFighter2-Saturn/AkiraVsLau.state create mode 100644 retro/data/stable/VirtuaFighter2-Saturn/data.json create mode 100644 retro/data/stable/VirtuaFighter2-Saturn/metadata.json create mode 100644 retro/data/stable/VirtuaFighter2-Saturn/rom.sha create mode 100644 retro/data/stable/VirtuaFighter2-Saturn/scenario.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 47380e093..d1619a6bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -226,6 +226,7 @@ endif() add_core(gba mgba) add_core(pce mednafen_pce_fast) add_core(32x picodrive) +add_core(saturn mednafen_saturn) set(CLEAN_CORES) diff --git a/cores/saturn.json b/cores/saturn.json new file mode 100644 index 000000000..23cb2c53d --- /dev/null +++ b/cores/saturn.json @@ -0,0 +1,15 @@ +{ + "Saturn": { + "lib": "mednafen_saturn", + "ext": ["chd"], + "keybinds": ["X", "Z", "TAB", "ENTER", "UP", "DOWN", "LEFT", "RIGHT", "C", "A", "S", "D"], + "buttons": ["B", "A", "MODE", "START", "UP", "DOWN", "LEFT", "RIGHT", "C", "Y", "X", "Z"], + "types": ["|u1", ">u2", ">u4", "|i1", ">i2", ">i4", "|d1", ">d2", ">d4", "d6", ">d8", ">n4", ">n6", ">n8"], + "overlay": ["=", ">", 2], + "actions": [ + [[], ["UP"], ["DOWN"]], + [[], ["LEFT"], ["RIGHT"]], + [[], ["A"], ["B"], ["C"], ["X"], ["Y"], ["Z"], ["A", "B"], ["B", "C"], ["A", "X"], ["B", "Y"], ["C", "Z"], ["X", "Y"], ["Y", "Z"]] + ] + } +} diff --git a/cores/saturn/.gitignore b/cores/saturn/.gitignore new file mode 100644 index 000000000..9a9966130 --- /dev/null +++ b/cores/saturn/.gitignore @@ -0,0 +1,7 @@ +*.o +*.so +*.dll +*.pdb +*.lib +*.exp +*.manifest diff --git a/cores/saturn/.gitlab-ci.yml b/cores/saturn/.gitlab-ci.yml new file mode 100644 index 000000000..5b8be1247 --- /dev/null +++ b/cores/saturn/.gitlab-ci.yml @@ -0,0 +1,148 @@ +# DESCRIPTION: GitLab CI/CD for libRetro (NOT FOR GitLab-proper) + +############################################################################## +################################# BOILERPLATE ################################ +############################################################################## + +# Core definitions +.core-defs: + variables: + JNI_PATH: . + CORENAME: mednafen_saturn + +# Inclusion templates, required for the build to work +include: + ################################## DESKTOPS ################################ + # Windows 64-bit + - project: 'libretro-infrastructure/ci-templates' + file: '/windows-x64-mingw.yml' + + # Windows 32-bit + - project: 'libretro-infrastructure/ci-templates' + file: '/windows-i686-mingw.yml' + + # Linux 64-bit + - project: 'libretro-infrastructure/ci-templates' + file: '/linux-x64.yml' + + # Linux 32-bit + - project: 'libretro-infrastructure/ci-templates' + file: '/linux-i686.yml' + + # MacOS 64-bit + - project: 'libretro-infrastructure/ci-templates' + file: '/osx-x64.yml' + + # MacOS ARM 64-bit + - project: 'libretro-infrastructure/ci-templates' + file: '/osx-arm64.yml' + + ################################## CELLULAR ################################ + # Android + - project: 'libretro-infrastructure/ci-templates' + file: '/android-jni.yml' + + # iOS + - project: 'libretro-infrastructure/ci-templates' + file: '/ios-arm64.yml' + + # iOS (armv7) + - project: 'libretro-infrastructure/ci-templates' + file: '/ios9.yml' + + ################################## CONSOLES ################################ + # tvOS (AppleTV) + - project: 'libretro-infrastructure/ci-templates' + file: '/tvos-arm64.yml' + + #################################### MISC ################################## + +# Stages for building +stages: + - build-prepare + - build-shared + - build-static + +############################################################################## +#################################### STAGES ################################## +############################################################################## +# +################################### DESKTOPS ################################# +# Windows 64-bit +libretro-build-windows-x64: + extends: + - .libretro-windows-x64-mingw-make-default + - .core-defs + +# Windows 32-bit +libretro-build-windows-i686: + extends: + - .libretro-windows-i686-mingw-make-default + - .core-defs + +# Linux 64-bit +libretro-build-linux-x64: + extends: + - .libretro-linux-x64-make-default + - .core-defs + +# Linux 32-bit +libretro-build-linux-i686: + extends: + - .libretro-linux-i686-make-default + - .core-defs + +# MacOS 64-bit +libretro-build-osx-x64: + extends: + - .libretro-osx-x64-make-default + - .core-defs + +# MacOS ARM 64-bit +libretro-build-osx-arm64: + extends: + - .libretro-osx-arm64-make-default + - .core-defs + +################################### CELLULAR ################################# +# Android ARMv7a +android-armeabi-v7a: + extends: + - .libretro-android-jni-armeabi-v7a + - .core-defs + +# Android ARMv8a +android-arm64-v8a: + extends: + - .libretro-android-jni-arm64-v8a + - .core-defs + +# Android 64-bit x86 +android-x86_64: + extends: + - .libretro-android-jni-x86_64 + - .core-defs + +# Android 32-bit x86 +android-x86: + extends: + - .libretro-android-jni-x86 + - .core-defs + +# iOS +libretro-build-ios-arm64: + extends: + - .libretro-ios-arm64-make-default + - .core-defs + +# iOS (armv7) [iOS 9 and up] +libretro-build-ios9: + extends: + - .libretro-ios9-make-default + - .core-defs + +# tvOS +libretro-build-tvos-arm64: + extends: + - .libretro-tvos-arm64-make-default + - .core-defs diff --git a/cores/saturn/.travis.yml b/cores/saturn/.travis.yml new file mode 100644 index 000000000..ddf07d7ef --- /dev/null +++ b/cores/saturn/.travis.yml @@ -0,0 +1,32 @@ +language: generic +os: linux +dist: trusty +sudo: required +addons: + apt: + packages: + - g++-7 + sources: + - ubuntu-toolchain-r-test +env: + global: + - CORE=mednafen_saturn + - COMPILER_NAME=gcc CXX=g++-7 CC=gcc-7 + matrix: + - PLATFORM=linux_x64 +before_script: + - pwd + - mkdir -p ~/bin + - ln -s /usr/bin/gcc-7 ~/bin/gcc + - ln -s /usr/bin/g++-7 ~/bin/g++ + - ln -s /usr/bin/cpp-7 ~/bin/cpp + - export PATH=~/bin:$PATH + - ls -l ~/bin + - echo $PATH + - g++-7 --version + - g++ --version +script: + - cd ~/ + - git clone --depth=50 https://github.com/libretro/libretro-super + - cd libretro-super/travis + - ./build.sh diff --git a/cores/saturn/COPYING b/cores/saturn/COPYING new file mode 100644 index 000000000..afd5a9471 --- /dev/null +++ b/cores/saturn/COPYING @@ -0,0 +1,341 @@ + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/cores/saturn/Makefile b/cores/saturn/Makefile new file mode 100644 index 000000000..f2593d6a7 --- /dev/null +++ b/cores/saturn/Makefile @@ -0,0 +1,495 @@ +DEBUG = 0 +HAVE_OPENGL = 0 +HAVE_CHD = 1 +HAVE_CDROM = 0 + +CORE_DIR := . + +SPACE := +SPACE := $(SPACE) $(SPACE) +BACKSLASH := +BACKSLASH := \$(BACKSLASH) +filter_out1 = $(filter-out $(firstword $1),$1) +filter_out2 = $(call filter_out1,$(call filter_out1,$1)) + +ifeq ($(platform),) + platform = unix + ifeq ($(shell uname -s),) + platform = win + else ifneq ($(findstring Darwin,$(shell uname -s)),) + platform = osx + arch = intel + ifeq ($(shell uname -p),powerpc) + arch = ppc + endif + else ifneq ($(findstring MINGW,$(shell uname -s)),) + platform = win + endif +else ifneq (,$(findstring armv,$(platform))) + override platform += unix +endif + +ifneq ($(platform), osx) + ifeq ($(findstring Haiku,$(shell uname -s)),) + PTHREAD_FLAGS = -pthread + endif +endif + +prefix := /usr +libdir := $(prefix)/lib + +LIBRETRO_INSTALL_DIR := libretro + +NEED_CD = 1 +NEED_TREMOR = 1 +NEED_BPP = 32 +NEED_DEINTERLACER = 1 +NEED_THREADING = 1 +CORE_DEFINE := +TARGET_NAME := mednafen_saturn + +ifeq ($(HAVE_OPENGL),1) + TARGET_NAME := mednafen_saturn_hw +endif + +GIT_VERSION := " $(shell git rev-parse --short HEAD || echo unknown)" +ifneq ($(GIT_VERSION)," unknown") + CXXFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" +endif + +# Unix +ifneq (,$(findstring unix,$(platform))) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + SHARED := -shared -Wl,--no-undefined -Wl,--version-script=link.T + ifneq (,$(findstring Haiku,$(shell uname -s))) + CXXFLAGS += -fpermissive + PTHREAD_FLAGS = -lpthread + endif + LDFLAGS += $(PTHREAD_FLAGS) + FLAGS += $(PTHREAD_FLAGS) + ifeq ($(HAVE_OPENGL),1) + ifneq (,$(findstring gles,$(platform))) + GLES = 1 + GL_LIB := -lGLESv2 + else + GL_LIB := -lGL + endif + endif + ifneq ($(findstring Linux,$(shell uname -s)),) + HAVE_CDROM = 1 + endif + +# OS X +else ifeq ($(platform), osx) + TARGET := $(TARGET_NAME)_libretro.dylib + fpic := -fPIC + SHARED := -dynamiclib + LDFLAGS += $(PTHREAD_FLAGS) + FLAGS += $(PTHREAD_FLAGS) + ifeq ($(arch),ppc) + ENDIANNESS_DEFINES := -DMSB_FIRST + OLD_GCC := 1 + endif + OSXVER = `sw_vers -productVersion | cut -d. -f 2` + OSX_LT_MAVERICKS = `(( $(OSXVER) <= 9)) && echo "YES"` + ifeq ($(OSX_LT_MAVERICKS),"YES") + fpic += -mmacosx-version-min=10.5 + endif + ifeq ($(HAVE_OPENGL),1) + GL_LIB := -framework OpenGL + endif + ifeq ($(CROSS_COMPILE),1) + TARGET_RULE = -target $(LIBRETRO_APPLE_PLATFORM) -isysroot $(LIBRETRO_APPLE_ISYSROOT) + CFLAGS += $(TARGET_RULE) + CPPFLAGS += $(TARGET_RULE) + CXXFLAGS += $(TARGET_RULE) + LDFLAGS += $(TARGET_RULE) + endif + +# iOS +else ifneq (,$(findstring ios,$(platform))) + TARGET := $(TARGET_NAME)_libretro_ios.dylib + fpic := -fPIC + SHARED := -dynamiclib + LDFLAGS += $(PTHREAD_FLAGS) + FLAGS += $(PTHREAD_FLAGS) -DHAVE_UNISTD_H + ifeq ($(IOSSDK),) + IOSSDK := $(shell xcrun -sdk iphoneos -show-sdk-path) + endif + ifeq ($(HAVE_OPENGL),1) + GL_LIB := -framework OpenGLES + endif + ifeq ($(platform), ios-arm64) + CC = cc -arch arm64 -isysroot $(IOSSDK) + CXX = c++ -arch arm64 -isysroot $(IOSSDK) + else + CC = cc -arch armv7 -isysroot $(IOSSDK) + CXX = c++ -arch armv7 -isysroot $(IOSSDK) + endif + IPHONEMINVER := + ifeq ($(platform),$(filter $(platform),ios9 ios-arm64)) + IPHONEMINVER = -miphoneos-version-min=8.0 + else + IPHONEMINVER = -miphoneos-version-min=5.0 + endif + LDFLAGS += $(IPHONEMINVER) + FLAGS += $(IPHONEMINVER) + CC += $(IPHONEMINVER) + CXX += $(IPHONEMINVER) + +# tvOS +else ifeq ($(platform), tvos-arm64) + TARGET := $(TARGET_NAME)_libretro_tvos.dylib + fpic := -fPIC + SHARED := -dynamiclib + FLAGS += -DHAVE_UNISTD_H + +ifeq ($(IOSSDK),) + IOSSDK := $(shell xcodebuild -version -sdk appletvos Path) +endif + CC = clang -arch arm64 -isysroot $(IOSSDK) + CXX = clang++ -arch arm64 -isysroot $(IOSSDK) + +# QNX +else ifeq ($(platform), qnx) + TARGET := $(TARGET_NAME)_libretro_$(platform).so + fpic := -fPIC + SHARED := -lcpp -lm -shared -Wl,--no-undefined -Wl,--version-script=link.T + #LDFLAGS += $(PTHREAD_FLAGS) + #FLAGS += $(PTHREAD_FLAGS) + CC = qcc -Vgcc_ntoarmv7le + CXX = QCC -Vgcc_ntoarmv7le_cpp + AR = QCC -Vgcc_ntoarmv7le + FLAGS += -D__BLACKBERRY_QNX__ -marm -mcpu=cortex-a9 -mfpu=neon -mfloat-abi=softfp + ifeq ($(HAVE_OPENGL),1) + GL_LIB := -lGLESv2 + endif + +# Lightweight PS3 Homebrew SDK +else ifeq ($(platform), psl1ght) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = $(PS3DEV)/ppu/bin/ppu-gcc$(EXE_EXT) + CXX = $(PS3DEV)/ppu/bin/ppu-g++$(EXE_EXT) + AR = $(PS3DEV)/ppu/bin/ppu-ar$(EXE_EXT) + ENDIANNESS_DEFINES := -DMSB_FIRST + STATIC_LINKING = 1 + +# PSP +else ifeq ($(platform), psp1) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = psp-gcc$(EXE_EXT) + CXX = psp-g++$(EXE_EXT) + AR = psp-ar$(EXE_EXT) + FLAGS += -DPSP -G0 + STATIC_LINKING = 1 + EXTRA_INCLUDES := -I$(shell psp-config --pspsdk-path)/include + +# Vita +else ifeq ($(platform), vita) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = arm-vita-eabi-gcc$(EXE_EXT) + CXX = arm-vita-eabi-g++$(EXE_EXT) + AR = arm-vita-eabi-ar$(EXE_EXT) + FLAGS += -DVITA + STATIC_LINKING = 1 + +# Xbox 360 +else ifeq ($(platform), xenon) + TARGET := $(TARGET_NAME)_libretro_xenon360.a + CC = xenon-gcc$(EXE_EXT) + CXX = xenon-g++$(EXE_EXT) + AR = xenon-ar$(EXE_EXT) + ENDIANNESS_DEFINES += -D__LIBXENON__ -m32 -D__ppc__ -DMSB_FIRST + LIBS := $(PTHREAD_FLAGS) + STATIC_LINKING = 1 + +# Nintendo Game Cube / Nintendo Wii +else ifneq (,$(filter $(platform),ngc wii)) + ifeq ($(platform), ngc) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + ENDIANNESS_DEFINES += -DHW_DOL + else ifeq ($(platform), wii) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + ENDIANNESS_DEFINES += -DHW_RVL + endif + ENDIANNESS_DEFINES += -DGEKKO -mrvl -mcpu=750 -meabi -mhard-float -DMSB_FIRST + CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT) + CXX = $(DEVKITPPC)/bin/powerpc-eabi-g++$(EXE_EXT) + AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) + EXTRA_INCLUDES := -I$(DEVKITPRO)/libogc/include + STATIC_LINKING = 1 + +# GCW0 +else ifeq ($(platform), gcw0) + TARGET := $(TARGET_NAME)_libretro.so + CC = /opt/gcw0-toolchain/usr/bin/mipsel-linux-gcc + CXX = /opt/gcw0-toolchain/usr/bin/mipsel-linux-g++ + AR = /opt/gcw0-toolchain/usr/bin/mipsel-linux-ar + fpic := -fPIC + SHARED := -shared -Wl,--no-undefined -Wl,--version-script=link.T + LDFLAGS += $(PTHREAD_FLAGS) + FLAGS += $(PTHREAD_FLAGS) + FLAGS += -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float + GLES = 1 + GL_LIB := -lGLESv2 + +# Emscripten +else ifeq ($(platform), emscripten) + TARGET := $(TARGET_NAME)_libretro_$(platform).bc + STATIC_LINKING = 1 + FLAGS += $(PTHREAD_FLAGS) -Dretro_fopen=gg_retro_fopen\ + -Dmain=gg_main\ + -Dretro_fclose=gg_retro_fclose\ + -Dretro_fseek=gg_retro_fseek\ + -Dretro_fread=gg_retro_fread\ + -Dretro_fwrite=gg_retro_fwrite\ + -Dscond_broadcast=gg_scond_broadcast\ + -Dscond_wait_timeout=gg_scond_wait_timeout\ + -Dscond_signal=gg_scond_signal\ + -Dscond_wait=gg_scond_wait\ + -Dscond_free=gg_scond_free\ + -Dscond_new=gg_scond_new\ + -Dslock_unlock=gg_slock_unlock\ + -Dslock_lock=gg_slock_lock\ + -Dslock_free=gg_slock_free\ + -Dslock_new=gg_slock_new\ + -Dsthread_join=gg_sthread_join\ + -Dsthread_detach=gg_sthread_detach\ + -Dsthread_create=gg_sthread_create\ + -Dscond=gg_scond\ + -Dslock=gg_slock\ + -Drglgen_symbol_map=mupen_rglgen_symbol_map \ + -Dmain_exit=mupen_main_exit \ + -Dadler32=mupen_adler32 \ + -Drglgen_resolve_symbols_custom=mupen_rglgen_resolve_symbols_custom \ + -Drglgen_resolve_symbols=mupen_rglgen_resolve_symbols \ + -Dsinc_resampler=mupen_sinc_resampler \ + -Dnearest_resampler=mupen_nearest_resampler \ + -DCC_resampler=mupen_CC_resampler \ + -Daudio_resampler_driver_find_handle=mupen_audio_resampler_driver_find_handle \ + -Daudio_resampler_driver_find_ident=mupen_audio_resampler_driver_find_ident \ + -Drarch_resampler_realloc=mupen_rarch_resampler_realloc \ + -Daudio_convert_s16_to_float_C=mupen_audio_convert_s16_to_float_C \ + -Daudio_convert_float_to_s16_C=mupen_audio_convert_float_to_s16_C \ + -Daudio_convert_init_simd=mupen_audio_convert_init_simd \ + -Dfilestream_open=gg_filestream_open\ + -Dfilestream_get_fd=gg_filestream_get_fd\ + -Dfilestream_read=gg_filestream_read\ + -Dfilestream_seek=gg_filestream_seek\ + -Dfilestream_close=gg_filestream_close\ + -Dfilestream_tell=gg_filestream_tell\ + -Dfilestream_read_file=gg_filestream_read_file\ + -Dfilestream_write_file=gg_filestream_write_file\ + -Dfilestream_write=gg_filestream_write\ + -Dfilestream_rewind=gg_filestream_rewind\ + -Dfilestream_putc=gg_filestream_putc\ + -Dstring_is_empty=gg_string_is_empty\ + -Dstring_is_equal=gg_string_is_equal\ + -Dstring_to_upper=gg_string_to_upper\ + -Dstring_to_lower=gg_string_to_lower\ + -Dstring_ucwords=gg_string_ucwords\ + -Dstring_replace_substring=gg_string_replace_substring\ + -Dstring_trim_whitespace_left=gg_string_trim_whitespace_left\ + -Dstring_trim_whitespace_right=gg_string_trim_whitespace_right\ + -Dstring_trim_whitespace_left=gg_string_trim_whitespace_left\ + -Dstring_trim_whitespace=gg_string_trim_whitespace\ + -Dsthread_isself=gg_sthread_isself\ + -Dstring_is_equal_noncase=gg_string_is_equal_noncase\ + -Dmkdir_norecurse=gg_mkdir_norecurse + +ifeq ($(HAVE_OPENGL),1) + ifneq (,$(findstring gles,$(platform))) + GLES = 1 + GL_LIB := -lGLESv2 + else + GL_LIB := -lGL + endif +endif + +# Windows MSVC 2017 all architectures +else ifneq (,$(findstring windows_msvc2017,$(platform))) + + NO_GCC := 1 + FLAGS += -DNOMINMAX -D__PIC__ + + PlatformSuffix = $(subst windows_msvc2017_,,$(platform)) + ifneq (,$(findstring desktop,$(PlatformSuffix))) + WinPartition = desktop + MSVC2017CompileFlags = -DWINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP -FS + LDFLAGS += -MANIFEST -LTCG:incremental -NXCOMPAT -DYNAMICBASE -DEBUG -OPT:REF -INCREMENTAL:NO -SUBSYSTEM:WINDOWS -MANIFESTUAC:"level='asInvoker' uiAccess='false'" -OPT:ICF -ERRORREPORT:PROMPT -NOLOGO -TLBID:1 + LIBS += kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib + else ifneq (,$(findstring uwp,$(PlatformSuffix))) + WinPartition = uwp + MSVC2017CompileFlags = -DWINAPI_FAMILY=WINAPI_FAMILY_APP -DWINDLL -D_UNICODE -DUNICODE -DWRL_NO_DEFAULT_LIB -FS + LDFLAGS += -APPCONTAINER -NXCOMPAT -DYNAMICBASE -MANIFEST:NO -LTCG -OPT:REF -SUBSYSTEM:CONSOLE -MANIFESTUAC:NO -OPT:ICF -ERRORREPORT:PROMPT -NOLOGO -TLBID:1 -DEBUG:FULL -WINMD:NO + LIBS += WindowsApp.lib + endif + + TargetArchMoniker = $(subst $(WinPartition)_,,$(PlatformSuffix)) + + ifneq (,$(findstring x64,$(TargetArchMoniker))) + MSVC2017CompileFlags += -D__x86_64__ + endif + ifneq (,$(findstring x86,$(TargetArchMoniker))) + MSVC2017CompileFlags += -D__i386__ + endif + ifneq (,$(findstring arm,$(TargetArchMoniker))) + MSVC2017CompileFlags += -D__arm__ + endif + + CFLAGS += $(MSVC2017CompileFlags) + CXXFLAGS += $(MSVC2017CompileFlags) -EHsc + + CC = cl.exe + CXX = cl.exe + LD = link.exe + + reg_query = $(call filter_out2,$(subst $2,,$(shell reg query "$2" -v "$1" 2>nul))) + fix_path = $(subst $(SPACE),\ ,$(subst \,/,$1)) + + ProgramFiles86w := $(shell cmd /c "echo %PROGRAMFILES(x86)%") + ProgramFiles86 := $(shell cygpath "$(ProgramFiles86w)") + + WindowsSdkDir ?= $(call reg_query,InstallationFolder,HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0) + WindowsSdkDir ?= $(call reg_query,InstallationFolder,HKEY_CURRENT_USER\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0) + WindowsSdkDir ?= $(call reg_query,InstallationFolder,HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0) + WindowsSdkDir ?= $(call reg_query,InstallationFolder,HKEY_CURRENT_USER\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0) + WindowsSdkDir := $(WindowsSdkDir) + + WindowsSDKVersion ?= $(firstword $(foreach folder,$(subst $(subst \,/,$(WindowsSdkDir)Include/),,$(wildcard $(call fix_path,$(WindowsSdkDir)Include\*))),$(if $(wildcard $(call fix_path,$(WindowsSdkDir)Include/$(folder)/um/Windows.h)),$(folder),)))$(BACKSLASH) + WindowsSDKVersion := $(WindowsSDKVersion) + + VsInstallBuildTools = $(ProgramFiles86)/Microsoft Visual Studio/2017/BuildTools + VsInstallEnterprise = $(ProgramFiles86)/Microsoft Visual Studio/2017/Enterprise + VsInstallProfessional = $(ProgramFiles86)/Microsoft Visual Studio/2017/Professional + VsInstallCommunity = $(ProgramFiles86)/Microsoft Visual Studio/2017/Community + + VsInstallRoot ?= $(shell if [ -d "$(VsInstallBuildTools)" ]; then echo "$(VsInstallBuildTools)"; fi) + ifeq ($(VsInstallRoot), ) + VsInstallRoot = $(shell if [ -d "$(VsInstallEnterprise)" ]; then echo "$(VsInstallEnterprise)"; fi) + endif + ifeq ($(VsInstallRoot), ) + VsInstallRoot = $(shell if [ -d "$(VsInstallProfessional)" ]; then echo "$(VsInstallProfessional)"; fi) + endif + ifeq ($(VsInstallRoot), ) + VsInstallRoot = $(shell if [ -d "$(VsInstallCommunity)" ]; then echo "$(VsInstallCommunity)"; fi) + endif + VsInstallRoot := $(VsInstallRoot) + + VcCompilerToolsVer := $(shell cat "$(VsInstallRoot)/VC/Auxiliary/Build/Microsoft.VCToolsVersion.default.txt" | grep -o '[0-9\.]*') + VcCompilerToolsDir := $(VsInstallRoot)/VC/Tools/MSVC/$(VcCompilerToolsVer) + + WindowsSDKSharedIncludeDir := $(shell cygpath -w "$(WindowsSdkDir)\Include\$(WindowsSDKVersion)\shared") + WindowsSDKUCRTIncludeDir := $(shell cygpath -w "$(WindowsSdkDir)\Include\$(WindowsSDKVersion)\ucrt") + WindowsSDKUMIncludeDir := $(shell cygpath -w "$(WindowsSdkDir)\Include\$(WindowsSDKVersion)\um") + WindowsSDKUCRTLibDir := $(shell cygpath -w "$(WindowsSdkDir)\Lib\$(WindowsSDKVersion)\ucrt\$(TargetArchMoniker)") + WindowsSDKUMLibDir := $(shell cygpath -w "$(WindowsSdkDir)\Lib\$(WindowsSDKVersion)\um\$(TargetArchMoniker)") + + # For some reason the HostX86 compiler doesn't like compiling for x64 + # ("no such file" opening a shared library), and vice-versa. + # Work around it for now by using the strictly x86 compiler for x86, and x64 for x64. + # NOTE: What about ARM? + ifneq (,$(findstring x64,$(TargetArchMoniker))) + VCCompilerToolsBinDir := $(VcCompilerToolsDir)\bin\HostX64 + else + VCCompilerToolsBinDir := $(VcCompilerToolsDir)\bin\HostX86 + endif + + PATH := $(shell IFS=$$'\n'; cygpath "$(VCCompilerToolsBinDir)/$(TargetArchMoniker)"):$(PATH) + PATH := $(PATH):$(shell IFS=$$'\n'; cygpath "$(VsInstallRoot)/Common7/IDE") + INCLUDE := $(shell IFS=$$'\n'; cygpath -w "$(VcCompilerToolsDir)/include") + LIB := $(shell IFS=$$'\n'; cygpath -w "$(VcCompilerToolsDir)/lib/$(TargetArchMoniker)") + + export INCLUDE := $(INCLUDE);$(WindowsSDKSharedIncludeDir);$(WindowsSDKUCRTIncludeDir);$(WindowsSDKUMIncludeDir) + export LIB := $(LIB);$(WindowsSDKUCRTLibDir);$(WindowsSDKUMLibDir) + TARGET := $(TARGET_NAME)_libretro.dll + LDFLAGS += -DLL + +# Windows +else + TARGET := $(TARGET_NAME)_libretro.dll + CC ?= gcc + CXX ?= g++ + SHARED := -shared -Wl,--no-undefined -Wl,--version-script=link.T + LDFLAGS += -static-libgcc -static-libstdc++ -lwinmm + ifeq ($(HAVE_OPENGL),1) + GL_LIB := -lopengl32 + endif + HAVE_CDROM = 1 + +endif + +include Makefile.common + +WARNINGS := -Wall \ + -Wno-sign-compare \ + -Wno-unused-variable \ + -Wno-unused-function \ + -Wno-uninitialized \ + $(NEW_GCC_WARNING_FLAGS) \ + -Wno-strict-aliasing + +#EXTRA_GCC_FLAGS := -funroll-loops + +ifeq ($(NO_GCC),1) + WARNINGS := +endif + +OBJECTS := $(SOURCES_CXX:.cpp=.o) $(SOURCES_C:.c=.o) + +all: $(TARGET) + +ifeq ($(DEBUG),0) + FLAGS += -O2 $(EXTRA_GCC_FLAGS) +else + FLAGS += -O0 -g +endif + +LDFLAGS += $(fpic) $(SHARED) +FLAGS += $(fpic) $(NEW_GCC_FLAGS) +FLAGS += $(INCFLAGS) + +FLAGS += $(ENDIANNESS_DEFINES) $(WARNINGS) $(CORE_DEFINE) -DSTDC_HEADERS -D__STDC_LIMIT_MACROS -D__LIBRETRO__ -D_LOW_ACCURACY_ $(EXTRA_INCLUDES) $(SOUND_DEFINE) -D__STDC_CONSTANT_MACROS + +CXXFLAGS += $(FLAGS) +CFLAGS += $(FLAGS) + +ifeq (,$(findstring msvc,$(platform))) + CXXFLAGS += -std=c++11 +endif + +OBJOUT = -o +LINKOUT = -o + +ifneq (,$(findstring msvc,$(platform))) + OBJOUT = -Fo + LINKOUT = -out: + LD = link.exe +else + LD = $(CXX) +endif + +$(TARGET): $(OBJECTS) +ifeq ($(STATIC_LINKING), 1) + $(AR) rcs $@ $(OBJECTS) +else + $(LD) $(LINKOUT)$@ $^ $(LDFLAGS) $(GL_LIB) $(LIBS) +endif + +%.o: %.cpp + $(CXX) -c $(OBJOUT)$@ $< $(CXXFLAGS) + +%.o: %.c + $(CC) -c $(OBJOUT)$@ $< $(CFLAGS) + +clean: + rm -f $(TARGET) $(OBJECTS) + +install: + install -D -m 755 $(TARGET) $(DESTDIR)$(libdir)/$(LIBRETRO_INSTALL_DIR)/$(TARGET) + +uninstall: + rm $(DESTDIR)$(libdir)/$(LIBRETRO_INSTALL_DIR)/$(TARGET) + +.PHONY: clean install uninstall diff --git a/cores/saturn/Makefile.common b/cores/saturn/Makefile.common new file mode 100644 index 000000000..221fa4137 --- /dev/null +++ b/cores/saturn/Makefile.common @@ -0,0 +1,194 @@ +SOURCES_CXX := +SOURCES_C := + +DEPS_DIR := $(CORE_DIR)/deps +LIBRETRO_COMM_DIR := $(CORE_DIR)/libretro-common +MEDNAFEN_DIR := $(CORE_DIR)/mednafen +CORE_EMU_DIR := $(MEDNAFEN_DIR)/ss +CDROM_DIR := $(MEDNAFEN_DIR)/cdrom + +ZLIB_INCFLAGS := -I$(DEPS_DIR)/zlib +INCFLAGS := -I$(CORE_DIR) -I$(MEDNAFEN_DIR) -I$(MEDNAFEN_DIR)/include -I$(MEDNAFEN_DIR)/intl -I$(MEDNAFEN_DIR)/hw_sound -I$(MEDNAFEN_DIR)/hw_cpu -I$(MEDNAFEN_DIR)/hw_misc -I$(LIBRETRO_COMM_DIR)/include + +ZLIB_SOURCES_C := $(DEPS_DIR)/zlib/adler32.c \ + $(DEPS_DIR)/zlib/crc32.c \ + $(DEPS_DIR)/zlib/inffast.c \ + $(DEPS_DIR)/zlib/inflate.c \ + $(DEPS_DIR)/zlib/inftrees.c + +ifeq ($(NEED_THREADING), 1) + FLAGS += -DWANT_THREADING -DHAVE_THREADS +endif + +ifeq ($(NEED_DEINTERLACER), 1) + FLAGS += -DNEED_DEINTERLACER +endif + +ifeq ($(NEED_BPP), 32) +FLAGS += -DWANT_32BPP +endif + +ifeq ($(NO_COMPUTED_GOTO), 1) +FLAGS += -DNO_COMPUTED_GOTO +endif + +ifeq ($(NEED_CD), 1) + FLAGS += -DNEED_CD +endif + +ifeq ($(HAVE_CHD), 1) + FLAGS += -DHAVE_CHD -D_7ZIP_ST +ifeq ($(SYSTEM_LIBCHDR), 1) + INCFLAGS += $(shell pkg-config --cflags libchdr) + LIBS += $(shell pkg-config --libs libchdr) +else + INCFLAGS += -I$(DEPS_DIR)/crypto \ + -I$(DEPS_DIR)/lzma/C \ + -I$(DEPS_DIR)/libchdr \ + -I$(DEPS_DIR)/libchdr/include \ + -I$(DEPS_DIR)/libchdr/include/libchdr +endif +endif + +ifeq ($(NEED_TREMOR), 1) + FLAGS += -DNEED_TREMOR +endif + +SOURCES_CXX += \ + $(CORE_EMU_DIR)/db.cpp \ + $(CORE_EMU_DIR)/cdb.cpp \ + $(CORE_EMU_DIR)/sound.cpp \ + $(CORE_EMU_DIR)/cart.cpp \ + $(CORE_EMU_DIR)/ss.cpp \ + $(CORE_EMU_DIR)/scu_dsp_gen.cpp \ + $(CORE_EMU_DIR)/scu_dsp_mvi.cpp \ + $(CORE_EMU_DIR)/scu_dsp_jmp.cpp \ + $(CORE_EMU_DIR)/scu_dsp_misc.cpp \ + $(CORE_EMU_DIR)/vdp1.cpp \ + $(CORE_EMU_DIR)/vdp1_line.cpp \ + $(CORE_EMU_DIR)/vdp1_sprite.cpp \ + $(CORE_EMU_DIR)/vdp1_poly.cpp \ + $(CORE_EMU_DIR)/vdp2.cpp \ + $(CORE_EMU_DIR)/vdp2_render.cpp \ + $(CORE_EMU_DIR)/smpc.cpp \ + $(CORE_EMU_DIR)/input/gamepad.cpp \ + $(CORE_EMU_DIR)/input/gun.cpp \ + $(CORE_EMU_DIR)/input/3dpad.cpp \ + $(CORE_EMU_DIR)/input/mouse.cpp \ + $(CORE_EMU_DIR)/input/multitap.cpp \ + $(CORE_EMU_DIR)/input/mission.cpp \ + $(CORE_EMU_DIR)/input/keyboard.cpp \ + $(CORE_EMU_DIR)/input/jpkeyboard.cpp \ + $(CORE_EMU_DIR)/input/wheel.cpp + +SOURCES_CXX += \ + $(CORE_EMU_DIR)/cart/backup.cpp \ + $(CORE_EMU_DIR)/cart/cs1ram.cpp \ + $(CORE_EMU_DIR)/cart/debug.cpp \ + $(CORE_EMU_DIR)/cart/extram.cpp \ + $(CORE_EMU_DIR)/cart/rom.cpp \ + $(CORE_EMU_DIR)/cart/ar4mp.cpp + +SOURCES_CXX += $(MEDNAFEN_DIR)/hw_cpu/m68k/m68k.cpp + +ifeq ($(NEED_TREMOR), 1) + SOURCES_C += $(wildcard $(MEDNAFEN_DIR)/tremor/*.c) +endif + +SOURCES_CXX += $(CDROM_DIR)/CDAccess.cpp \ + $(CDROM_DIR)/CDAccess_Image.cpp \ + $(CDROM_DIR)/CDAccess_CCD.cpp \ + $(CDROM_DIR)/CDAFReader.cpp \ + $(CDROM_DIR)/CDAFReader_Vorbis.cpp \ + $(CDROM_DIR)/cdromif.cpp \ + $(CDROM_DIR)/CDUtility.cpp \ + $(CDROM_DIR)/lec.cpp \ + $(CDROM_DIR)/galois.cpp \ + $(CDROM_DIR)/recover-raw.cpp \ + $(CDROM_DIR)/l-ec.cpp \ + $(CDROM_DIR)/edc_crc32.cpp + +ifeq ($(HAVE_CHD), 1) +ifneq ($(SYSTEM_LIBCHDR), 1) +LZMA_DIR := $(DEPS_DIR)/lzma/src +LIBCHDR_DIR := $(DEPS_DIR)/libchdr/src +INCFLAGS += -I$(DEPS_DIR)/lzma/include +SOURCES_C += \ + $(DEPS_DIR)/crypto/md5.c \ + $(DEPS_DIR)/crypto/sha1.c \ + $(LZMA_DIR)/Alloc.c \ + $(LZMA_DIR)/Bra86.c \ + $(LZMA_DIR)/BraIA64.c \ + $(LZMA_DIR)/CpuArch.c \ + $(LZMA_DIR)/Delta.c \ + $(LZMA_DIR)/LzFind.c \ + $(LZMA_DIR)/Lzma86Dec.c \ + $(LZMA_DIR)/LzmaDec.c \ + $(LZMA_DIR)/LzmaEnc.c \ + $(LIBCHDR_DIR)/libchdr_bitstream.c \ + $(LIBCHDR_DIR)/libchdr_cdrom.c \ + $(LIBCHDR_DIR)/libchdr_chd.c \ + $(LIBCHDR_DIR)/libchdr_flac.c \ + $(LIBCHDR_DIR)/libchdr_huffman.c +endif + +SOURCES_CXX += $(CDROM_DIR)/CDAccess_CHD.cpp +endif + +SOURCES_CXX += \ + $(MEDNAFEN_DIR)/error.cpp \ + $(MEDNAFEN_DIR)/settings.cpp \ + $(MEDNAFEN_DIR)/general.cpp \ + $(MEDNAFEN_DIR)/FileStream.cpp \ + $(MEDNAFEN_DIR)/MemoryStream.cpp \ + $(MEDNAFEN_DIR)/Stream.cpp \ + $(MEDNAFEN_DIR)/state.cpp \ + $(MEDNAFEN_DIR)/mempatcher.cpp \ + $(MEDNAFEN_DIR)/video/Deinterlacer.cpp \ + $(MEDNAFEN_DIR)/video/surface.cpp \ + $(CORE_DIR)/disc.cpp \ + $(CORE_DIR)/input.cpp \ + $(CORE_DIR)/libretro.cpp \ + $(CORE_DIR)/libretro_settings.cpp + +SOURCES_C += $(MEDNAFEN_DIR)/mednafen-endian.c + +SOURCES_CXX += \ + $(MEDNAFEN_DIR)/hash/sha256.cpp \ + $(MEDNAFEN_DIR)/hash/md5.cpp + +ifeq ($(HAVE_CHD), 1) +ifneq ($(SYSTEM_LIBCHDR), 1) +ifeq ($(SYSTEM_ZLIB), 1) + INCFLAGS += $(shell pkg-config --cflags zlib) + LIBS += $(shell pkg-config --libs zlib) +else + INCFLAGS += $(ZLIB_INCFLAGS) + SOURCES_C += $(ZLIB_SOURCES_C) +endif +endif +endif + +ifneq ($(STATIC_LINKING), 1) +SOURCES_C += $(LIBRETRO_COMM_DIR)/streams/file_stream.c \ + $(LIBRETRO_COMM_DIR)/streams/file_stream_transforms.c \ + $(LIBRETRO_COMM_DIR)/file/file_path.c \ + $(LIBRETRO_COMM_DIR)/file/retro_dirent.c \ + $(LIBRETRO_COMM_DIR)/lists/dir_list.c \ + $(LIBRETRO_COMM_DIR)/lists/string_list.c \ + $(LIBRETRO_COMM_DIR)/vfs/vfs_implementation.c \ + $(LIBRETRO_COMM_DIR)/compat/compat_posix_string.c \ + $(LIBRETRO_COMM_DIR)/compat/compat_strl.c \ + $(LIBRETRO_COMM_DIR)/compat/compat_strcasestr.c \ + $(LIBRETRO_COMM_DIR)/compat/fopen_utf8.c \ + $(LIBRETRO_COMM_DIR)/encodings/encoding_utf.c \ + $(LIBRETRO_COMM_DIR)/memmap/memalign.c \ + $(LIBRETRO_COMM_DIR)/string/stdstring.c \ + $(LIBRETRO_COMM_DIR)/time/rtime.c + +ifeq ($(NEED_THREADING), 1) +SOURCES_C += $(LIBRETRO_COMM_DIR)/rthreads/rthreads.c \ + $(LIBRETRO_COMM_DIR)/rthreads/rsemaphore.c +endif + +endif diff --git a/cores/saturn/README.md b/cores/saturn/README.md new file mode 100644 index 000000000..19559732c --- /dev/null +++ b/cores/saturn/README.md @@ -0,0 +1,47 @@ +# Beetle Saturn libretro + +This is fork of Mednafen Saturn. It has been ported to the libretro API. +It currently runs on Linux, OSX and possibly Windows. + +## Running + +To run this core, the "system directory" must be defined if running in RetroArch. +The Saturn BIOS must be placed there, $sysdir/{sega_101,mpr-17933}.bin for Japanese, NA and EU regions respectively. + +## Loading ISOs + +Beetle Saturn needs a cue-sheet that points to an image file, usually an .iso/.bin file. +If you have e.g. foo.iso, you should create a foo.cue, and fill this in: + + FILE "foo.iso" BINARY + TRACK 01 MODE1/2352 + INDEX 01 00:00:00 + +After that, you can load the foo.cue file as a ROM. +Note that this is a dirty hack and will not work on all games. +Ideally, make sure to use rips that have cue-sheets. + +If foo is a multiple-disk game, you should have .cue files for each one, e.g. foo (Disc 1).cue, foo (Disc 2).cue, foo (Disc 3).cue.To take advantage of Beetle's Disk Control feature for disk swapping, an index file should be made. + +Open a text file and enter your game's .cue files on it, like this: + + foo (Disc 1).cue + foo (Disc 2).cue + foo (Disc 3).cue + +Save as foo.m3u and use this file in place of each disk's individual cue sheet. + +## Suggested Firmware + +- sega_101.bin (85ec9ca47d8f6807718151cbcca8b964) +- mpr-17933.bin (3240872c70984b6cbfda1586cab68dbe) +- mpr-18811-mx.ic1 (255113ba943c92a54facd25a10fd780c) +- mpr-19367-mx.ic1 (1cd19988d1d72a3e7caa0b73234c96b4) + +## Options + +* CD Image Cache - Loads the complete image in memory at startup +* Initial Scanline - Sets the first scanline to be drawn on screen +* Initial Scanline PAL - Sets the first scanline to be drawn on screen for PAL systems +* Last Scanline - Sets the last scanline to be drawn on screen +* Last Scanline PAL - Sets the last scanline to be drawn on screen for PAL systems diff --git a/cores/saturn/deps/crypto/README.md b/cores/saturn/deps/crypto/README.md new file mode 100644 index 000000000..64eafd26b --- /dev/null +++ b/cores/saturn/deps/crypto/README.md @@ -0,0 +1,17 @@ +crypto-algorithms +================= + + +About +--- +These are basic implementations of standard cryptography algorithms, written by Brad Conte (brad@bradconte.com) from scratch and without any cross-licensing. They exist to provide publically accessible, restriction-free implementations of popular cryptographic algorithms, like AES and SHA-1. These are primarily intended for educational and pragmatic purposes (such as comparing a specification to actual implementation code, or for building an internal application that computes test vectors for a product). The algorithms have been tested against standard test vectors. + +This code is released into the public domain free of any restrictions. The author requests acknowledgement if the code is used, but does not require it. This code is provided free of any liability and without any quality claims by the author. + +Note that these are *not* cryptographically secure implementations. They have no resistence to side-channel attacks and should not be used in contexts that need cryptographically secure implementations. + +These algorithms are not optimized for speed or space. They are primarily designed to be easy to read, although some basic optimization techniques have been employed. + +Building +--- +The source code for each algorithm will come in a pair of a source code file and a header file. There should be no inter-header file dependencies, no additional libraries, no platform-specific header files, or any other complicating matters. Compiling them should be as easy as adding the relevent source code to the project. \ No newline at end of file diff --git a/cores/saturn/deps/crypto/aes.c b/cores/saturn/deps/crypto/aes.c new file mode 100644 index 000000000..c57283970 --- /dev/null +++ b/cores/saturn/deps/crypto/aes.c @@ -0,0 +1,1095 @@ +/********************************************************************* +* Filename: aes.c +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: This code is the implementation of the AES algorithm and + the CTR, CBC, and CCM modes of operation it can be used in. + AES is, specified by the NIST in in publication FIPS PUB 197, + availible at: + * http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf . + The CBC and CTR modes of operation are specified by + NIST SP 800-38 A, available at: + * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf . + The CCM mode of operation is specified by NIST SP80-38 C, available at: + * http://csrc.nist.gov/publications/nistpubs/800-38C/SP800-38C_updated-July20_2007.pdf +*********************************************************************/ + +/*************************** HEADER FILES ***************************/ +#include +#include +#include "aes.h" + +#include + +/****************************** MACROS ******************************/ +// The least significant byte of the word is rotated to the end. +#define KE_ROTWORD(x) (((x) << 8) | ((x) >> 24)) + +#define TRUE 1 +#define FALSE 0 + +/**************************** DATA TYPES ****************************/ +#define AES_128_ROUNDS 10 +#define AES_192_ROUNDS 12 +#define AES_256_ROUNDS 14 + +/*********************** FUNCTION DECLARATIONS **********************/ +void ccm_prepare_first_ctr_blk(BYTE counter[], const BYTE nonce[], int nonce_len, int payload_len_store_size); +void ccm_prepare_first_format_blk(BYTE buf[], int assoc_len, int payload_len, int payload_len_store_size, int mac_len, const BYTE nonce[], int nonce_len); +void ccm_format_assoc_data(BYTE buf[], int *end_of_buf, const BYTE assoc[], int assoc_len); +void ccm_format_payload_data(BYTE buf[], int *end_of_buf, const BYTE payload[], int payload_len); + +/**************************** VARIABLES *****************************/ +// This is the specified AES SBox. To look up a substitution value, put the first +// nibble in the first index (row) and the second nibble in the second index (column). +static const BYTE aes_sbox[16][16] = { + {0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76}, + {0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0}, + {0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15}, + {0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75}, + {0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84}, + {0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF}, + {0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8}, + {0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2}, + {0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73}, + {0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB}, + {0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79}, + {0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08}, + {0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A}, + {0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E}, + {0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF}, + {0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16} +}; + +static const BYTE aes_invsbox[16][16] = { + {0x52,0x09,0x6A,0xD5,0x30,0x36,0xA5,0x38,0xBF,0x40,0xA3,0x9E,0x81,0xF3,0xD7,0xFB}, + {0x7C,0xE3,0x39,0x82,0x9B,0x2F,0xFF,0x87,0x34,0x8E,0x43,0x44,0xC4,0xDE,0xE9,0xCB}, + {0x54,0x7B,0x94,0x32,0xA6,0xC2,0x23,0x3D,0xEE,0x4C,0x95,0x0B,0x42,0xFA,0xC3,0x4E}, + {0x08,0x2E,0xA1,0x66,0x28,0xD9,0x24,0xB2,0x76,0x5B,0xA2,0x49,0x6D,0x8B,0xD1,0x25}, + {0x72,0xF8,0xF6,0x64,0x86,0x68,0x98,0x16,0xD4,0xA4,0x5C,0xCC,0x5D,0x65,0xB6,0x92}, + {0x6C,0x70,0x48,0x50,0xFD,0xED,0xB9,0xDA,0x5E,0x15,0x46,0x57,0xA7,0x8D,0x9D,0x84}, + {0x90,0xD8,0xAB,0x00,0x8C,0xBC,0xD3,0x0A,0xF7,0xE4,0x58,0x05,0xB8,0xB3,0x45,0x06}, + {0xD0,0x2C,0x1E,0x8F,0xCA,0x3F,0x0F,0x02,0xC1,0xAF,0xBD,0x03,0x01,0x13,0x8A,0x6B}, + {0x3A,0x91,0x11,0x41,0x4F,0x67,0xDC,0xEA,0x97,0xF2,0xCF,0xCE,0xF0,0xB4,0xE6,0x73}, + {0x96,0xAC,0x74,0x22,0xE7,0xAD,0x35,0x85,0xE2,0xF9,0x37,0xE8,0x1C,0x75,0xDF,0x6E}, + {0x47,0xF1,0x1A,0x71,0x1D,0x29,0xC5,0x89,0x6F,0xB7,0x62,0x0E,0xAA,0x18,0xBE,0x1B}, + {0xFC,0x56,0x3E,0x4B,0xC6,0xD2,0x79,0x20,0x9A,0xDB,0xC0,0xFE,0x78,0xCD,0x5A,0xF4}, + {0x1F,0xDD,0xA8,0x33,0x88,0x07,0xC7,0x31,0xB1,0x12,0x10,0x59,0x27,0x80,0xEC,0x5F}, + {0x60,0x51,0x7F,0xA9,0x19,0xB5,0x4A,0x0D,0x2D,0xE5,0x7A,0x9F,0x93,0xC9,0x9C,0xEF}, + {0xA0,0xE0,0x3B,0x4D,0xAE,0x2A,0xF5,0xB0,0xC8,0xEB,0xBB,0x3C,0x83,0x53,0x99,0x61}, + {0x17,0x2B,0x04,0x7E,0xBA,0x77,0xD6,0x26,0xE1,0x69,0x14,0x63,0x55,0x21,0x0C,0x7D} +}; + +// This table stores pre-calculated values for all possible GF(2^8) calculations.This +// table is only used by the (Inv)MixColumns steps. +// USAGE: The second index (column) is the coefficient of multiplication. Only 7 different +// coefficients are used: 0x01, 0x02, 0x03, 0x09, 0x0b, 0x0d, 0x0e, but multiplication by +// 1 is negligible leaving only 6 coefficients. Each column of the table is devoted to one +// of these coefficients, in the ascending order of value, from values 0x00 to 0xFF. +static const BYTE gf_mul[256][6] = { + {0x00,0x00,0x00,0x00,0x00,0x00},{0x02,0x03,0x09,0x0b,0x0d,0x0e}, + {0x04,0x06,0x12,0x16,0x1a,0x1c},{0x06,0x05,0x1b,0x1d,0x17,0x12}, + {0x08,0x0c,0x24,0x2c,0x34,0x38},{0x0a,0x0f,0x2d,0x27,0x39,0x36}, + {0x0c,0x0a,0x36,0x3a,0x2e,0x24},{0x0e,0x09,0x3f,0x31,0x23,0x2a}, + {0x10,0x18,0x48,0x58,0x68,0x70},{0x12,0x1b,0x41,0x53,0x65,0x7e}, + {0x14,0x1e,0x5a,0x4e,0x72,0x6c},{0x16,0x1d,0x53,0x45,0x7f,0x62}, + {0x18,0x14,0x6c,0x74,0x5c,0x48},{0x1a,0x17,0x65,0x7f,0x51,0x46}, + {0x1c,0x12,0x7e,0x62,0x46,0x54},{0x1e,0x11,0x77,0x69,0x4b,0x5a}, + {0x20,0x30,0x90,0xb0,0xd0,0xe0},{0x22,0x33,0x99,0xbb,0xdd,0xee}, + {0x24,0x36,0x82,0xa6,0xca,0xfc},{0x26,0x35,0x8b,0xad,0xc7,0xf2}, + {0x28,0x3c,0xb4,0x9c,0xe4,0xd8},{0x2a,0x3f,0xbd,0x97,0xe9,0xd6}, + {0x2c,0x3a,0xa6,0x8a,0xfe,0xc4},{0x2e,0x39,0xaf,0x81,0xf3,0xca}, + {0x30,0x28,0xd8,0xe8,0xb8,0x90},{0x32,0x2b,0xd1,0xe3,0xb5,0x9e}, + {0x34,0x2e,0xca,0xfe,0xa2,0x8c},{0x36,0x2d,0xc3,0xf5,0xaf,0x82}, + {0x38,0x24,0xfc,0xc4,0x8c,0xa8},{0x3a,0x27,0xf5,0xcf,0x81,0xa6}, + {0x3c,0x22,0xee,0xd2,0x96,0xb4},{0x3e,0x21,0xe7,0xd9,0x9b,0xba}, + {0x40,0x60,0x3b,0x7b,0xbb,0xdb},{0x42,0x63,0x32,0x70,0xb6,0xd5}, + {0x44,0x66,0x29,0x6d,0xa1,0xc7},{0x46,0x65,0x20,0x66,0xac,0xc9}, + {0x48,0x6c,0x1f,0x57,0x8f,0xe3},{0x4a,0x6f,0x16,0x5c,0x82,0xed}, + {0x4c,0x6a,0x0d,0x41,0x95,0xff},{0x4e,0x69,0x04,0x4a,0x98,0xf1}, + {0x50,0x78,0x73,0x23,0xd3,0xab},{0x52,0x7b,0x7a,0x28,0xde,0xa5}, + {0x54,0x7e,0x61,0x35,0xc9,0xb7},{0x56,0x7d,0x68,0x3e,0xc4,0xb9}, + {0x58,0x74,0x57,0x0f,0xe7,0x93},{0x5a,0x77,0x5e,0x04,0xea,0x9d}, + {0x5c,0x72,0x45,0x19,0xfd,0x8f},{0x5e,0x71,0x4c,0x12,0xf0,0x81}, + {0x60,0x50,0xab,0xcb,0x6b,0x3b},{0x62,0x53,0xa2,0xc0,0x66,0x35}, + {0x64,0x56,0xb9,0xdd,0x71,0x27},{0x66,0x55,0xb0,0xd6,0x7c,0x29}, + {0x68,0x5c,0x8f,0xe7,0x5f,0x03},{0x6a,0x5f,0x86,0xec,0x52,0x0d}, + {0x6c,0x5a,0x9d,0xf1,0x45,0x1f},{0x6e,0x59,0x94,0xfa,0x48,0x11}, + {0x70,0x48,0xe3,0x93,0x03,0x4b},{0x72,0x4b,0xea,0x98,0x0e,0x45}, + {0x74,0x4e,0xf1,0x85,0x19,0x57},{0x76,0x4d,0xf8,0x8e,0x14,0x59}, + {0x78,0x44,0xc7,0xbf,0x37,0x73},{0x7a,0x47,0xce,0xb4,0x3a,0x7d}, + {0x7c,0x42,0xd5,0xa9,0x2d,0x6f},{0x7e,0x41,0xdc,0xa2,0x20,0x61}, + {0x80,0xc0,0x76,0xf6,0x6d,0xad},{0x82,0xc3,0x7f,0xfd,0x60,0xa3}, + {0x84,0xc6,0x64,0xe0,0x77,0xb1},{0x86,0xc5,0x6d,0xeb,0x7a,0xbf}, + {0x88,0xcc,0x52,0xda,0x59,0x95},{0x8a,0xcf,0x5b,0xd1,0x54,0x9b}, + {0x8c,0xca,0x40,0xcc,0x43,0x89},{0x8e,0xc9,0x49,0xc7,0x4e,0x87}, + {0x90,0xd8,0x3e,0xae,0x05,0xdd},{0x92,0xdb,0x37,0xa5,0x08,0xd3}, + {0x94,0xde,0x2c,0xb8,0x1f,0xc1},{0x96,0xdd,0x25,0xb3,0x12,0xcf}, + {0x98,0xd4,0x1a,0x82,0x31,0xe5},{0x9a,0xd7,0x13,0x89,0x3c,0xeb}, + {0x9c,0xd2,0x08,0x94,0x2b,0xf9},{0x9e,0xd1,0x01,0x9f,0x26,0xf7}, + {0xa0,0xf0,0xe6,0x46,0xbd,0x4d},{0xa2,0xf3,0xef,0x4d,0xb0,0x43}, + {0xa4,0xf6,0xf4,0x50,0xa7,0x51},{0xa6,0xf5,0xfd,0x5b,0xaa,0x5f}, + {0xa8,0xfc,0xc2,0x6a,0x89,0x75},{0xaa,0xff,0xcb,0x61,0x84,0x7b}, + {0xac,0xfa,0xd0,0x7c,0x93,0x69},{0xae,0xf9,0xd9,0x77,0x9e,0x67}, + {0xb0,0xe8,0xae,0x1e,0xd5,0x3d},{0xb2,0xeb,0xa7,0x15,0xd8,0x33}, + {0xb4,0xee,0xbc,0x08,0xcf,0x21},{0xb6,0xed,0xb5,0x03,0xc2,0x2f}, + {0xb8,0xe4,0x8a,0x32,0xe1,0x05},{0xba,0xe7,0x83,0x39,0xec,0x0b}, + {0xbc,0xe2,0x98,0x24,0xfb,0x19},{0xbe,0xe1,0x91,0x2f,0xf6,0x17}, + {0xc0,0xa0,0x4d,0x8d,0xd6,0x76},{0xc2,0xa3,0x44,0x86,0xdb,0x78}, + {0xc4,0xa6,0x5f,0x9b,0xcc,0x6a},{0xc6,0xa5,0x56,0x90,0xc1,0x64}, + {0xc8,0xac,0x69,0xa1,0xe2,0x4e},{0xca,0xaf,0x60,0xaa,0xef,0x40}, + {0xcc,0xaa,0x7b,0xb7,0xf8,0x52},{0xce,0xa9,0x72,0xbc,0xf5,0x5c}, + {0xd0,0xb8,0x05,0xd5,0xbe,0x06},{0xd2,0xbb,0x0c,0xde,0xb3,0x08}, + {0xd4,0xbe,0x17,0xc3,0xa4,0x1a},{0xd6,0xbd,0x1e,0xc8,0xa9,0x14}, + {0xd8,0xb4,0x21,0xf9,0x8a,0x3e},{0xda,0xb7,0x28,0xf2,0x87,0x30}, + {0xdc,0xb2,0x33,0xef,0x90,0x22},{0xde,0xb1,0x3a,0xe4,0x9d,0x2c}, + {0xe0,0x90,0xdd,0x3d,0x06,0x96},{0xe2,0x93,0xd4,0x36,0x0b,0x98}, + {0xe4,0x96,0xcf,0x2b,0x1c,0x8a},{0xe6,0x95,0xc6,0x20,0x11,0x84}, + {0xe8,0x9c,0xf9,0x11,0x32,0xae},{0xea,0x9f,0xf0,0x1a,0x3f,0xa0}, + {0xec,0x9a,0xeb,0x07,0x28,0xb2},{0xee,0x99,0xe2,0x0c,0x25,0xbc}, + {0xf0,0x88,0x95,0x65,0x6e,0xe6},{0xf2,0x8b,0x9c,0x6e,0x63,0xe8}, + {0xf4,0x8e,0x87,0x73,0x74,0xfa},{0xf6,0x8d,0x8e,0x78,0x79,0xf4}, + {0xf8,0x84,0xb1,0x49,0x5a,0xde},{0xfa,0x87,0xb8,0x42,0x57,0xd0}, + {0xfc,0x82,0xa3,0x5f,0x40,0xc2},{0xfe,0x81,0xaa,0x54,0x4d,0xcc}, + {0x1b,0x9b,0xec,0xf7,0xda,0x41},{0x19,0x98,0xe5,0xfc,0xd7,0x4f}, + {0x1f,0x9d,0xfe,0xe1,0xc0,0x5d},{0x1d,0x9e,0xf7,0xea,0xcd,0x53}, + {0x13,0x97,0xc8,0xdb,0xee,0x79},{0x11,0x94,0xc1,0xd0,0xe3,0x77}, + {0x17,0x91,0xda,0xcd,0xf4,0x65},{0x15,0x92,0xd3,0xc6,0xf9,0x6b}, + {0x0b,0x83,0xa4,0xaf,0xb2,0x31},{0x09,0x80,0xad,0xa4,0xbf,0x3f}, + {0x0f,0x85,0xb6,0xb9,0xa8,0x2d},{0x0d,0x86,0xbf,0xb2,0xa5,0x23}, + {0x03,0x8f,0x80,0x83,0x86,0x09},{0x01,0x8c,0x89,0x88,0x8b,0x07}, + {0x07,0x89,0x92,0x95,0x9c,0x15},{0x05,0x8a,0x9b,0x9e,0x91,0x1b}, + {0x3b,0xab,0x7c,0x47,0x0a,0xa1},{0x39,0xa8,0x75,0x4c,0x07,0xaf}, + {0x3f,0xad,0x6e,0x51,0x10,0xbd},{0x3d,0xae,0x67,0x5a,0x1d,0xb3}, + {0x33,0xa7,0x58,0x6b,0x3e,0x99},{0x31,0xa4,0x51,0x60,0x33,0x97}, + {0x37,0xa1,0x4a,0x7d,0x24,0x85},{0x35,0xa2,0x43,0x76,0x29,0x8b}, + {0x2b,0xb3,0x34,0x1f,0x62,0xd1},{0x29,0xb0,0x3d,0x14,0x6f,0xdf}, + {0x2f,0xb5,0x26,0x09,0x78,0xcd},{0x2d,0xb6,0x2f,0x02,0x75,0xc3}, + {0x23,0xbf,0x10,0x33,0x56,0xe9},{0x21,0xbc,0x19,0x38,0x5b,0xe7}, + {0x27,0xb9,0x02,0x25,0x4c,0xf5},{0x25,0xba,0x0b,0x2e,0x41,0xfb}, + {0x5b,0xfb,0xd7,0x8c,0x61,0x9a},{0x59,0xf8,0xde,0x87,0x6c,0x94}, + {0x5f,0xfd,0xc5,0x9a,0x7b,0x86},{0x5d,0xfe,0xcc,0x91,0x76,0x88}, + {0x53,0xf7,0xf3,0xa0,0x55,0xa2},{0x51,0xf4,0xfa,0xab,0x58,0xac}, + {0x57,0xf1,0xe1,0xb6,0x4f,0xbe},{0x55,0xf2,0xe8,0xbd,0x42,0xb0}, + {0x4b,0xe3,0x9f,0xd4,0x09,0xea},{0x49,0xe0,0x96,0xdf,0x04,0xe4}, + {0x4f,0xe5,0x8d,0xc2,0x13,0xf6},{0x4d,0xe6,0x84,0xc9,0x1e,0xf8}, + {0x43,0xef,0xbb,0xf8,0x3d,0xd2},{0x41,0xec,0xb2,0xf3,0x30,0xdc}, + {0x47,0xe9,0xa9,0xee,0x27,0xce},{0x45,0xea,0xa0,0xe5,0x2a,0xc0}, + {0x7b,0xcb,0x47,0x3c,0xb1,0x7a},{0x79,0xc8,0x4e,0x37,0xbc,0x74}, + {0x7f,0xcd,0x55,0x2a,0xab,0x66},{0x7d,0xce,0x5c,0x21,0xa6,0x68}, + {0x73,0xc7,0x63,0x10,0x85,0x42},{0x71,0xc4,0x6a,0x1b,0x88,0x4c}, + {0x77,0xc1,0x71,0x06,0x9f,0x5e},{0x75,0xc2,0x78,0x0d,0x92,0x50}, + {0x6b,0xd3,0x0f,0x64,0xd9,0x0a},{0x69,0xd0,0x06,0x6f,0xd4,0x04}, + {0x6f,0xd5,0x1d,0x72,0xc3,0x16},{0x6d,0xd6,0x14,0x79,0xce,0x18}, + {0x63,0xdf,0x2b,0x48,0xed,0x32},{0x61,0xdc,0x22,0x43,0xe0,0x3c}, + {0x67,0xd9,0x39,0x5e,0xf7,0x2e},{0x65,0xda,0x30,0x55,0xfa,0x20}, + {0x9b,0x5b,0x9a,0x01,0xb7,0xec},{0x99,0x58,0x93,0x0a,0xba,0xe2}, + {0x9f,0x5d,0x88,0x17,0xad,0xf0},{0x9d,0x5e,0x81,0x1c,0xa0,0xfe}, + {0x93,0x57,0xbe,0x2d,0x83,0xd4},{0x91,0x54,0xb7,0x26,0x8e,0xda}, + {0x97,0x51,0xac,0x3b,0x99,0xc8},{0x95,0x52,0xa5,0x30,0x94,0xc6}, + {0x8b,0x43,0xd2,0x59,0xdf,0x9c},{0x89,0x40,0xdb,0x52,0xd2,0x92}, + {0x8f,0x45,0xc0,0x4f,0xc5,0x80},{0x8d,0x46,0xc9,0x44,0xc8,0x8e}, + {0x83,0x4f,0xf6,0x75,0xeb,0xa4},{0x81,0x4c,0xff,0x7e,0xe6,0xaa}, + {0x87,0x49,0xe4,0x63,0xf1,0xb8},{0x85,0x4a,0xed,0x68,0xfc,0xb6}, + {0xbb,0x6b,0x0a,0xb1,0x67,0x0c},{0xb9,0x68,0x03,0xba,0x6a,0x02}, + {0xbf,0x6d,0x18,0xa7,0x7d,0x10},{0xbd,0x6e,0x11,0xac,0x70,0x1e}, + {0xb3,0x67,0x2e,0x9d,0x53,0x34},{0xb1,0x64,0x27,0x96,0x5e,0x3a}, + {0xb7,0x61,0x3c,0x8b,0x49,0x28},{0xb5,0x62,0x35,0x80,0x44,0x26}, + {0xab,0x73,0x42,0xe9,0x0f,0x7c},{0xa9,0x70,0x4b,0xe2,0x02,0x72}, + {0xaf,0x75,0x50,0xff,0x15,0x60},{0xad,0x76,0x59,0xf4,0x18,0x6e}, + {0xa3,0x7f,0x66,0xc5,0x3b,0x44},{0xa1,0x7c,0x6f,0xce,0x36,0x4a}, + {0xa7,0x79,0x74,0xd3,0x21,0x58},{0xa5,0x7a,0x7d,0xd8,0x2c,0x56}, + {0xdb,0x3b,0xa1,0x7a,0x0c,0x37},{0xd9,0x38,0xa8,0x71,0x01,0x39}, + {0xdf,0x3d,0xb3,0x6c,0x16,0x2b},{0xdd,0x3e,0xba,0x67,0x1b,0x25}, + {0xd3,0x37,0x85,0x56,0x38,0x0f},{0xd1,0x34,0x8c,0x5d,0x35,0x01}, + {0xd7,0x31,0x97,0x40,0x22,0x13},{0xd5,0x32,0x9e,0x4b,0x2f,0x1d}, + {0xcb,0x23,0xe9,0x22,0x64,0x47},{0xc9,0x20,0xe0,0x29,0x69,0x49}, + {0xcf,0x25,0xfb,0x34,0x7e,0x5b},{0xcd,0x26,0xf2,0x3f,0x73,0x55}, + {0xc3,0x2f,0xcd,0x0e,0x50,0x7f},{0xc1,0x2c,0xc4,0x05,0x5d,0x71}, + {0xc7,0x29,0xdf,0x18,0x4a,0x63},{0xc5,0x2a,0xd6,0x13,0x47,0x6d}, + {0xfb,0x0b,0x31,0xca,0xdc,0xd7},{0xf9,0x08,0x38,0xc1,0xd1,0xd9}, + {0xff,0x0d,0x23,0xdc,0xc6,0xcb},{0xfd,0x0e,0x2a,0xd7,0xcb,0xc5}, + {0xf3,0x07,0x15,0xe6,0xe8,0xef},{0xf1,0x04,0x1c,0xed,0xe5,0xe1}, + {0xf7,0x01,0x07,0xf0,0xf2,0xf3},{0xf5,0x02,0x0e,0xfb,0xff,0xfd}, + {0xeb,0x13,0x79,0x92,0xb4,0xa7},{0xe9,0x10,0x70,0x99,0xb9,0xa9}, + {0xef,0x15,0x6b,0x84,0xae,0xbb},{0xed,0x16,0x62,0x8f,0xa3,0xb5}, + {0xe3,0x1f,0x5d,0xbe,0x80,0x9f},{0xe1,0x1c,0x54,0xb5,0x8d,0x91}, + {0xe7,0x19,0x4f,0xa8,0x9a,0x83},{0xe5,0x1a,0x46,0xa3,0x97,0x8d} +}; + +/*********************** FUNCTION DEFINITIONS ***********************/ +// XORs the in and out buffers, storing the result in out. Length is in bytes. +void xor_buf(const BYTE in[], BYTE out[], size_t len) +{ + size_t idx; + + for (idx = 0; idx < len; idx++) + out[idx] ^= in[idx]; +} + +/******************* +* AES - CBC +*******************/ +int aes_encrypt_cbc(const BYTE in[], size_t in_len, BYTE out[], const WORD key[], int keysize, const BYTE iv[]) +{ + BYTE buf_in[AES_BLOCK_SIZE], buf_out[AES_BLOCK_SIZE], iv_buf[AES_BLOCK_SIZE]; + int blocks, idx; + + if (in_len % AES_BLOCK_SIZE != 0) + return(FALSE); + + blocks = in_len / AES_BLOCK_SIZE; + + memcpy(iv_buf, iv, AES_BLOCK_SIZE); + + for (idx = 0; idx < blocks; idx++) { + memcpy(buf_in, &in[idx * AES_BLOCK_SIZE], AES_BLOCK_SIZE); + xor_buf(iv_buf, buf_in, AES_BLOCK_SIZE); + aes_encrypt(buf_in, buf_out, key, keysize); + memcpy(&out[idx * AES_BLOCK_SIZE], buf_out, AES_BLOCK_SIZE); + memcpy(iv_buf, buf_out, AES_BLOCK_SIZE); + } + + return(TRUE); +} + +int aes_encrypt_cbc_mac(const BYTE in[], size_t in_len, BYTE out[], const WORD key[], int keysize, const BYTE iv[]) +{ + BYTE buf_in[AES_BLOCK_SIZE], buf_out[AES_BLOCK_SIZE], iv_buf[AES_BLOCK_SIZE]; + int blocks, idx; + + if (in_len % AES_BLOCK_SIZE != 0) + return(FALSE); + + blocks = in_len / AES_BLOCK_SIZE; + + memcpy(iv_buf, iv, AES_BLOCK_SIZE); + + for (idx = 0; idx < blocks; idx++) { + memcpy(buf_in, &in[idx * AES_BLOCK_SIZE], AES_BLOCK_SIZE); + xor_buf(iv_buf, buf_in, AES_BLOCK_SIZE); + aes_encrypt(buf_in, buf_out, key, keysize); + memcpy(iv_buf, buf_out, AES_BLOCK_SIZE); + // Do not output all encrypted blocks. + } + + memcpy(out, buf_out, AES_BLOCK_SIZE); // Only output the last block. + + return(TRUE); +} + +int aes_decrypt_cbc(const BYTE in[], size_t in_len, BYTE out[], const WORD key[], int keysize, const BYTE iv[]) +{ + BYTE buf_in[AES_BLOCK_SIZE], buf_out[AES_BLOCK_SIZE], iv_buf[AES_BLOCK_SIZE]; + int blocks, idx; + + if (in_len % AES_BLOCK_SIZE != 0) + return(FALSE); + + blocks = in_len / AES_BLOCK_SIZE; + + memcpy(iv_buf, iv, AES_BLOCK_SIZE); + + for (idx = 0; idx < blocks; idx++) { + memcpy(buf_in, &in[idx * AES_BLOCK_SIZE], AES_BLOCK_SIZE); + aes_decrypt(buf_in, buf_out, key, keysize); + xor_buf(iv_buf, buf_out, AES_BLOCK_SIZE); + memcpy(&out[idx * AES_BLOCK_SIZE], buf_out, AES_BLOCK_SIZE); + memcpy(iv_buf, buf_in, AES_BLOCK_SIZE); + } + + return(TRUE); +} + +/******************* +* AES - CTR +*******************/ +void increment_iv(BYTE iv[], int counter_size) +{ + int idx; + + // Use counter_size bytes at the end of the IV as the big-endian integer to increment. + for (idx = AES_BLOCK_SIZE - 1; idx >= AES_BLOCK_SIZE - counter_size; idx--) { + iv[idx]++; + if (iv[idx] != 0 || idx == AES_BLOCK_SIZE - counter_size) + break; + } +} + +// Performs the encryption in-place, the input and output buffers may be the same. +// Input may be an arbitrary length (in bytes). +void aes_encrypt_ctr(const BYTE in[], size_t in_len, BYTE out[], const WORD key[], int keysize, const BYTE iv[]) +{ + size_t idx = 0, last_block_length; + BYTE iv_buf[AES_BLOCK_SIZE], out_buf[AES_BLOCK_SIZE]; + + if (in != out) + memcpy(out, in, in_len); + + memcpy(iv_buf, iv, AES_BLOCK_SIZE); + last_block_length = in_len - AES_BLOCK_SIZE; + + if (in_len > AES_BLOCK_SIZE) { + for (idx = 0; idx < last_block_length; idx += AES_BLOCK_SIZE) { + aes_encrypt(iv_buf, out_buf, key, keysize); + xor_buf(out_buf, &out[idx], AES_BLOCK_SIZE); + increment_iv(iv_buf, AES_BLOCK_SIZE); + } + } + + aes_encrypt(iv_buf, out_buf, key, keysize); + xor_buf(out_buf, &out[idx], in_len - idx); // Use the Most Significant bytes. +} + +void aes_decrypt_ctr(const BYTE in[], size_t in_len, BYTE out[], const WORD key[], int keysize, const BYTE iv[]) +{ + // CTR encryption is its own inverse function. + aes_encrypt_ctr(in, in_len, out, key, keysize, iv); +} + +/******************* +* AES - CCM +*******************/ +// out_len = payload_len + assoc_len +int aes_encrypt_ccm(const BYTE payload[], WORD payload_len, const BYTE assoc[], unsigned short assoc_len, + const BYTE nonce[], unsigned short nonce_len, BYTE out[], WORD *out_len, + WORD mac_len, const BYTE key_str[], int keysize) +{ + BYTE temp_iv[AES_BLOCK_SIZE], counter[AES_BLOCK_SIZE], mac[16], *buf; + int end_of_buf, payload_len_store_size; + WORD key[60]; + + if (mac_len != 4 && mac_len != 6 && mac_len != 8 && mac_len != 10 && + mac_len != 12 && mac_len != 14 && mac_len != 16) + return(FALSE); + + if (nonce_len < 7 || nonce_len > 13) + return(FALSE); + + if (assoc_len > 32768 /* = 2^15 */) + return(FALSE); + + buf = (BYTE*)malloc(payload_len + assoc_len + 48 /*Round both payload and associated data up a block size and add an extra block.*/); + if (! buf) + return(FALSE); + + // Prepare the key for usage. + aes_key_setup(key_str, key, keysize); + + // Format the first block of the formatted data. + payload_len_store_size = AES_BLOCK_SIZE - 1 - nonce_len; + ccm_prepare_first_format_blk(buf, assoc_len, payload_len, payload_len_store_size, mac_len, nonce, nonce_len); + end_of_buf = AES_BLOCK_SIZE; + + // Format the Associated Data, aka, assoc[]. + ccm_format_assoc_data(buf, &end_of_buf, assoc, assoc_len); + + // Format the Payload, aka payload[]. + ccm_format_payload_data(buf, &end_of_buf, payload, payload_len); + + // Create the first counter block. + ccm_prepare_first_ctr_blk(counter, nonce, nonce_len, payload_len_store_size); + + // Perform the CBC operation with an IV of zeros on the formatted buffer to calculate the MAC. + memset(temp_iv, 0, AES_BLOCK_SIZE); + aes_encrypt_cbc_mac(buf, end_of_buf, mac, key, keysize, temp_iv); + + // Copy the Payload and MAC to the output buffer. + memcpy(out, payload, payload_len); + memcpy(&out[payload_len], mac, mac_len); + + // Encrypt the Payload with CTR mode with a counter starting at 1. + memcpy(temp_iv, counter, AES_BLOCK_SIZE); + increment_iv(temp_iv, AES_BLOCK_SIZE - 1 - mac_len); // Last argument is the byte size of the counting portion of the counter block. /*BUG?*/ + aes_encrypt_ctr(out, payload_len, out, key, keysize, temp_iv); + + // Encrypt the MAC with CTR mode with a counter starting at 0. + aes_encrypt_ctr(&out[payload_len], mac_len, &out[payload_len], key, keysize, counter); + + free(buf); + *out_len = payload_len + mac_len; + + return(TRUE); +} + +// plaintext_len = ciphertext_len - mac_len +// Needs a flag for whether the MAC matches. +int aes_decrypt_ccm(const BYTE ciphertext[], WORD ciphertext_len, const BYTE assoc[], unsigned short assoc_len, + const BYTE nonce[], unsigned short nonce_len, BYTE plaintext[], WORD *plaintext_len, + WORD mac_len, int *mac_auth, const BYTE key_str[], int keysize) +{ + BYTE temp_iv[AES_BLOCK_SIZE], counter[AES_BLOCK_SIZE], mac[16], mac_buf[16], *buf; + int end_of_buf, plaintext_len_store_size; + WORD key[60]; + + if (ciphertext_len <= mac_len) + return(FALSE); + + buf = (BYTE*)malloc(assoc_len + ciphertext_len /*ciphertext_len = plaintext_len + mac_len*/ + 48); + if (! buf) + return(FALSE); + + // Prepare the key for usage. + aes_key_setup(key_str, key, keysize); + + // Copy the plaintext and MAC to the output buffers. + *plaintext_len = ciphertext_len - mac_len; + plaintext_len_store_size = AES_BLOCK_SIZE - 1 - nonce_len; + memcpy(plaintext, ciphertext, *plaintext_len); + memcpy(mac, &ciphertext[*plaintext_len], mac_len); + + // Prepare the first counter block for use in decryption. + ccm_prepare_first_ctr_blk(counter, nonce, nonce_len, plaintext_len_store_size); + + // Decrypt the Payload with CTR mode with a counter starting at 1. + memcpy(temp_iv, counter, AES_BLOCK_SIZE); + increment_iv(temp_iv, AES_BLOCK_SIZE - 1 - mac_len); // (AES_BLOCK_SIZE - 1 - mac_len) is the byte size of the counting portion of the counter block. + aes_decrypt_ctr(plaintext, *plaintext_len, plaintext, key, keysize, temp_iv); + + // Setting mac_auth to NULL disables the authentication check. + if (mac_auth != NULL) { + // Decrypt the MAC with CTR mode with a counter starting at 0. + aes_decrypt_ctr(mac, mac_len, mac, key, keysize, counter); + + // Format the first block of the formatted data. + plaintext_len_store_size = AES_BLOCK_SIZE - 1 - nonce_len; + ccm_prepare_first_format_blk(buf, assoc_len, *plaintext_len, plaintext_len_store_size, mac_len, nonce, nonce_len); + end_of_buf = AES_BLOCK_SIZE; + + // Format the Associated Data into the authentication buffer. + ccm_format_assoc_data(buf, &end_of_buf, assoc, assoc_len); + + // Format the Payload into the authentication buffer. + ccm_format_payload_data(buf, &end_of_buf, plaintext, *plaintext_len); + + // Perform the CBC operation with an IV of zeros on the formatted buffer to calculate the MAC. + memset(temp_iv, 0, AES_BLOCK_SIZE); + aes_encrypt_cbc_mac(buf, end_of_buf, mac_buf, key, keysize, temp_iv); + + // Compare the calculated MAC against the MAC embedded in the ciphertext to see if they are the same. + if (! memcmp(mac, mac_buf, mac_len)) { + *mac_auth = TRUE; + } + else { + *mac_auth = FALSE; + memset(plaintext, 0, *plaintext_len); + } + } + + free(buf); + + return(TRUE); +} + +// Creates the first counter block. First byte is flags, then the nonce, then the incremented part. +void ccm_prepare_first_ctr_blk(BYTE counter[], const BYTE nonce[], int nonce_len, int payload_len_store_size) +{ + memset(counter, 0, AES_BLOCK_SIZE); + counter[0] = (payload_len_store_size - 1) & 0x07; + memcpy(&counter[1], nonce, nonce_len); +} + +void ccm_prepare_first_format_blk(BYTE buf[], int assoc_len, int payload_len, int payload_len_store_size, int mac_len, const BYTE nonce[], int nonce_len) +{ + // Set the flags for the first byte of the first block. + buf[0] = ((((mac_len - 2) / 2) & 0x07) << 3) | ((payload_len_store_size - 1) & 0x07); + if (assoc_len > 0) + buf[0] += 0x40; + // Format the rest of the first block, storing the nonce and the size of the payload. + memcpy(&buf[1], nonce, nonce_len); + memset(&buf[1 + nonce_len], 0, AES_BLOCK_SIZE - 1 - nonce_len); + buf[15] = payload_len & 0x000000FF; + buf[14] = (payload_len >> 8) & 0x000000FF; +} + +void ccm_format_assoc_data(BYTE buf[], int *end_of_buf, const BYTE assoc[], int assoc_len) +{ + int pad; + + buf[*end_of_buf + 1] = assoc_len & 0x00FF; + buf[*end_of_buf] = (assoc_len >> 8) & 0x00FF; + *end_of_buf += 2; + memcpy(&buf[*end_of_buf], assoc, assoc_len); + *end_of_buf += assoc_len; + pad = AES_BLOCK_SIZE - (*end_of_buf % AES_BLOCK_SIZE); /*BUG?*/ + memset(&buf[*end_of_buf], 0, pad); + *end_of_buf += pad; +} + +void ccm_format_payload_data(BYTE buf[], int *end_of_buf, const BYTE payload[], int payload_len) +{ + int pad; + + memcpy(&buf[*end_of_buf], payload, payload_len); + *end_of_buf += payload_len; + pad = *end_of_buf % AES_BLOCK_SIZE; + if (pad != 0) + pad = AES_BLOCK_SIZE - pad; + memset(&buf[*end_of_buf], 0, pad); + *end_of_buf += pad; +} + +/******************* +* AES +*******************/ +///////////////// +// KEY EXPANSION +///////////////// + +// Substitutes a word using the AES S-Box. +WORD SubWord(WORD word) +{ + unsigned int result; + + result = (int)aes_sbox[(word >> 4) & 0x0000000F][word & 0x0000000F]; + result += (int)aes_sbox[(word >> 12) & 0x0000000F][(word >> 8) & 0x0000000F] << 8; + result += (int)aes_sbox[(word >> 20) & 0x0000000F][(word >> 16) & 0x0000000F] << 16; + result += (int)aes_sbox[(word >> 28) & 0x0000000F][(word >> 24) & 0x0000000F] << 24; + return(result); +} + +// Performs the action of generating the keys that will be used in every round of +// encryption. "key" is the user-supplied input key, "w" is the output key schedule, +// "keysize" is the length in bits of "key", must be 128, 192, or 256. +void aes_key_setup(const BYTE key[], WORD w[], int keysize) +{ + int Nb=4,Nr,Nk,idx; + WORD temp,Rcon[]={0x01000000,0x02000000,0x04000000,0x08000000,0x10000000,0x20000000, + 0x40000000,0x80000000,0x1b000000,0x36000000,0x6c000000,0xd8000000, + 0xab000000,0x4d000000,0x9a000000}; + + switch (keysize) { + case 128: Nr = 10; Nk = 4; break; + case 192: Nr = 12; Nk = 6; break; + case 256: Nr = 14; Nk = 8; break; + default: return; + } + + for (idx=0; idx < Nk; ++idx) { + w[idx] = ((key[4 * idx]) << 24) | ((key[4 * idx + 1]) << 16) | + ((key[4 * idx + 2]) << 8) | ((key[4 * idx + 3])); + } + + for (idx = Nk; idx < Nb * (Nr+1); ++idx) { + temp = w[idx - 1]; + if ((idx % Nk) == 0) + temp = SubWord(KE_ROTWORD(temp)) ^ Rcon[(idx-1)/Nk]; + else if (Nk > 6 && (idx % Nk) == 4) + temp = SubWord(temp); + w[idx] = w[idx-Nk] ^ temp; + } +} + +///////////////// +// ADD ROUND KEY +///////////////// + +// Performs the AddRoundKey step. Each round has its own pre-generated 16-byte key in the +// form of 4 integers (the "w" array). Each integer is XOR'd by one column of the state. +// Also performs the job of InvAddRoundKey(); since the function is a simple XOR process, +// it is its own inverse. +void AddRoundKey(BYTE state[][4], const WORD w[]) +{ + BYTE subkey[4]; + + // memcpy(subkey,&w[idx],4); // Not accurate for big endian machines + // Subkey 1 + subkey[0] = w[0] >> 24; + subkey[1] = w[0] >> 16; + subkey[2] = w[0] >> 8; + subkey[3] = w[0]; + state[0][0] ^= subkey[0]; + state[1][0] ^= subkey[1]; + state[2][0] ^= subkey[2]; + state[3][0] ^= subkey[3]; + // Subkey 2 + subkey[0] = w[1] >> 24; + subkey[1] = w[1] >> 16; + subkey[2] = w[1] >> 8; + subkey[3] = w[1]; + state[0][1] ^= subkey[0]; + state[1][1] ^= subkey[1]; + state[2][1] ^= subkey[2]; + state[3][1] ^= subkey[3]; + // Subkey 3 + subkey[0] = w[2] >> 24; + subkey[1] = w[2] >> 16; + subkey[2] = w[2] >> 8; + subkey[3] = w[2]; + state[0][2] ^= subkey[0]; + state[1][2] ^= subkey[1]; + state[2][2] ^= subkey[2]; + state[3][2] ^= subkey[3]; + // Subkey 4 + subkey[0] = w[3] >> 24; + subkey[1] = w[3] >> 16; + subkey[2] = w[3] >> 8; + subkey[3] = w[3]; + state[0][3] ^= subkey[0]; + state[1][3] ^= subkey[1]; + state[2][3] ^= subkey[2]; + state[3][3] ^= subkey[3]; +} + +///////////////// +// (Inv)SubBytes +///////////////// + +// Performs the SubBytes step. All bytes in the state are substituted with a +// pre-calculated value from a lookup table. +void SubBytes(BYTE state[][4]) +{ + state[0][0] = aes_sbox[state[0][0] >> 4][state[0][0] & 0x0F]; + state[0][1] = aes_sbox[state[0][1] >> 4][state[0][1] & 0x0F]; + state[0][2] = aes_sbox[state[0][2] >> 4][state[0][2] & 0x0F]; + state[0][3] = aes_sbox[state[0][3] >> 4][state[0][3] & 0x0F]; + state[1][0] = aes_sbox[state[1][0] >> 4][state[1][0] & 0x0F]; + state[1][1] = aes_sbox[state[1][1] >> 4][state[1][1] & 0x0F]; + state[1][2] = aes_sbox[state[1][2] >> 4][state[1][2] & 0x0F]; + state[1][3] = aes_sbox[state[1][3] >> 4][state[1][3] & 0x0F]; + state[2][0] = aes_sbox[state[2][0] >> 4][state[2][0] & 0x0F]; + state[2][1] = aes_sbox[state[2][1] >> 4][state[2][1] & 0x0F]; + state[2][2] = aes_sbox[state[2][2] >> 4][state[2][2] & 0x0F]; + state[2][3] = aes_sbox[state[2][3] >> 4][state[2][3] & 0x0F]; + state[3][0] = aes_sbox[state[3][0] >> 4][state[3][0] & 0x0F]; + state[3][1] = aes_sbox[state[3][1] >> 4][state[3][1] & 0x0F]; + state[3][2] = aes_sbox[state[3][2] >> 4][state[3][2] & 0x0F]; + state[3][3] = aes_sbox[state[3][3] >> 4][state[3][3] & 0x0F]; +} + +void InvSubBytes(BYTE state[][4]) +{ + state[0][0] = aes_invsbox[state[0][0] >> 4][state[0][0] & 0x0F]; + state[0][1] = aes_invsbox[state[0][1] >> 4][state[0][1] & 0x0F]; + state[0][2] = aes_invsbox[state[0][2] >> 4][state[0][2] & 0x0F]; + state[0][3] = aes_invsbox[state[0][3] >> 4][state[0][3] & 0x0F]; + state[1][0] = aes_invsbox[state[1][0] >> 4][state[1][0] & 0x0F]; + state[1][1] = aes_invsbox[state[1][1] >> 4][state[1][1] & 0x0F]; + state[1][2] = aes_invsbox[state[1][2] >> 4][state[1][2] & 0x0F]; + state[1][3] = aes_invsbox[state[1][3] >> 4][state[1][3] & 0x0F]; + state[2][0] = aes_invsbox[state[2][0] >> 4][state[2][0] & 0x0F]; + state[2][1] = aes_invsbox[state[2][1] >> 4][state[2][1] & 0x0F]; + state[2][2] = aes_invsbox[state[2][2] >> 4][state[2][2] & 0x0F]; + state[2][3] = aes_invsbox[state[2][3] >> 4][state[2][3] & 0x0F]; + state[3][0] = aes_invsbox[state[3][0] >> 4][state[3][0] & 0x0F]; + state[3][1] = aes_invsbox[state[3][1] >> 4][state[3][1] & 0x0F]; + state[3][2] = aes_invsbox[state[3][2] >> 4][state[3][2] & 0x0F]; + state[3][3] = aes_invsbox[state[3][3] >> 4][state[3][3] & 0x0F]; +} + +///////////////// +// (Inv)ShiftRows +///////////////// + +// Performs the ShiftRows step. All rows are shifted cylindrically to the left. +void ShiftRows(BYTE state[][4]) +{ + int t; + + // Shift left by 1 + t = state[1][0]; + state[1][0] = state[1][1]; + state[1][1] = state[1][2]; + state[1][2] = state[1][3]; + state[1][3] = t; + // Shift left by 2 + t = state[2][0]; + state[2][0] = state[2][2]; + state[2][2] = t; + t = state[2][1]; + state[2][1] = state[2][3]; + state[2][3] = t; + // Shift left by 3 + t = state[3][0]; + state[3][0] = state[3][3]; + state[3][3] = state[3][2]; + state[3][2] = state[3][1]; + state[3][1] = t; +} + +// All rows are shifted cylindrically to the right. +void InvShiftRows(BYTE state[][4]) +{ + int t; + + // Shift right by 1 + t = state[1][3]; + state[1][3] = state[1][2]; + state[1][2] = state[1][1]; + state[1][1] = state[1][0]; + state[1][0] = t; + // Shift right by 2 + t = state[2][3]; + state[2][3] = state[2][1]; + state[2][1] = t; + t = state[2][2]; + state[2][2] = state[2][0]; + state[2][0] = t; + // Shift right by 3 + t = state[3][3]; + state[3][3] = state[3][0]; + state[3][0] = state[3][1]; + state[3][1] = state[3][2]; + state[3][2] = t; +} + +///////////////// +// (Inv)MixColumns +///////////////// + +// Performs the MixColums step. The state is multiplied by itself using matrix +// multiplication in a Galios Field 2^8. All multiplication is pre-computed in a table. +// Addition is equivilent to XOR. (Must always make a copy of the column as the original +// values will be destoyed.) +void MixColumns(BYTE state[][4]) +{ + BYTE col[4]; + + // Column 1 + col[0] = state[0][0]; + col[1] = state[1][0]; + col[2] = state[2][0]; + col[3] = state[3][0]; + state[0][0] = gf_mul[col[0]][0]; + state[0][0] ^= gf_mul[col[1]][1]; + state[0][0] ^= col[2]; + state[0][0] ^= col[3]; + state[1][0] = col[0]; + state[1][0] ^= gf_mul[col[1]][0]; + state[1][0] ^= gf_mul[col[2]][1]; + state[1][0] ^= col[3]; + state[2][0] = col[0]; + state[2][0] ^= col[1]; + state[2][0] ^= gf_mul[col[2]][0]; + state[2][0] ^= gf_mul[col[3]][1]; + state[3][0] = gf_mul[col[0]][1]; + state[3][0] ^= col[1]; + state[3][0] ^= col[2]; + state[3][0] ^= gf_mul[col[3]][0]; + // Column 2 + col[0] = state[0][1]; + col[1] = state[1][1]; + col[2] = state[2][1]; + col[3] = state[3][1]; + state[0][1] = gf_mul[col[0]][0]; + state[0][1] ^= gf_mul[col[1]][1]; + state[0][1] ^= col[2]; + state[0][1] ^= col[3]; + state[1][1] = col[0]; + state[1][1] ^= gf_mul[col[1]][0]; + state[1][1] ^= gf_mul[col[2]][1]; + state[1][1] ^= col[3]; + state[2][1] = col[0]; + state[2][1] ^= col[1]; + state[2][1] ^= gf_mul[col[2]][0]; + state[2][1] ^= gf_mul[col[3]][1]; + state[3][1] = gf_mul[col[0]][1]; + state[3][1] ^= col[1]; + state[3][1] ^= col[2]; + state[3][1] ^= gf_mul[col[3]][0]; + // Column 3 + col[0] = state[0][2]; + col[1] = state[1][2]; + col[2] = state[2][2]; + col[3] = state[3][2]; + state[0][2] = gf_mul[col[0]][0]; + state[0][2] ^= gf_mul[col[1]][1]; + state[0][2] ^= col[2]; + state[0][2] ^= col[3]; + state[1][2] = col[0]; + state[1][2] ^= gf_mul[col[1]][0]; + state[1][2] ^= gf_mul[col[2]][1]; + state[1][2] ^= col[3]; + state[2][2] = col[0]; + state[2][2] ^= col[1]; + state[2][2] ^= gf_mul[col[2]][0]; + state[2][2] ^= gf_mul[col[3]][1]; + state[3][2] = gf_mul[col[0]][1]; + state[3][2] ^= col[1]; + state[3][2] ^= col[2]; + state[3][2] ^= gf_mul[col[3]][0]; + // Column 4 + col[0] = state[0][3]; + col[1] = state[1][3]; + col[2] = state[2][3]; + col[3] = state[3][3]; + state[0][3] = gf_mul[col[0]][0]; + state[0][3] ^= gf_mul[col[1]][1]; + state[0][3] ^= col[2]; + state[0][3] ^= col[3]; + state[1][3] = col[0]; + state[1][3] ^= gf_mul[col[1]][0]; + state[1][3] ^= gf_mul[col[2]][1]; + state[1][3] ^= col[3]; + state[2][3] = col[0]; + state[2][3] ^= col[1]; + state[2][3] ^= gf_mul[col[2]][0]; + state[2][3] ^= gf_mul[col[3]][1]; + state[3][3] = gf_mul[col[0]][1]; + state[3][3] ^= col[1]; + state[3][3] ^= col[2]; + state[3][3] ^= gf_mul[col[3]][0]; +} + +void InvMixColumns(BYTE state[][4]) +{ + BYTE col[4]; + + // Column 1 + col[0] = state[0][0]; + col[1] = state[1][0]; + col[2] = state[2][0]; + col[3] = state[3][0]; + state[0][0] = gf_mul[col[0]][5]; + state[0][0] ^= gf_mul[col[1]][3]; + state[0][0] ^= gf_mul[col[2]][4]; + state[0][0] ^= gf_mul[col[3]][2]; + state[1][0] = gf_mul[col[0]][2]; + state[1][0] ^= gf_mul[col[1]][5]; + state[1][0] ^= gf_mul[col[2]][3]; + state[1][0] ^= gf_mul[col[3]][4]; + state[2][0] = gf_mul[col[0]][4]; + state[2][0] ^= gf_mul[col[1]][2]; + state[2][0] ^= gf_mul[col[2]][5]; + state[2][0] ^= gf_mul[col[3]][3]; + state[3][0] = gf_mul[col[0]][3]; + state[3][0] ^= gf_mul[col[1]][4]; + state[3][0] ^= gf_mul[col[2]][2]; + state[3][0] ^= gf_mul[col[3]][5]; + // Column 2 + col[0] = state[0][1]; + col[1] = state[1][1]; + col[2] = state[2][1]; + col[3] = state[3][1]; + state[0][1] = gf_mul[col[0]][5]; + state[0][1] ^= gf_mul[col[1]][3]; + state[0][1] ^= gf_mul[col[2]][4]; + state[0][1] ^= gf_mul[col[3]][2]; + state[1][1] = gf_mul[col[0]][2]; + state[1][1] ^= gf_mul[col[1]][5]; + state[1][1] ^= gf_mul[col[2]][3]; + state[1][1] ^= gf_mul[col[3]][4]; + state[2][1] = gf_mul[col[0]][4]; + state[2][1] ^= gf_mul[col[1]][2]; + state[2][1] ^= gf_mul[col[2]][5]; + state[2][1] ^= gf_mul[col[3]][3]; + state[3][1] = gf_mul[col[0]][3]; + state[3][1] ^= gf_mul[col[1]][4]; + state[3][1] ^= gf_mul[col[2]][2]; + state[3][1] ^= gf_mul[col[3]][5]; + // Column 3 + col[0] = state[0][2]; + col[1] = state[1][2]; + col[2] = state[2][2]; + col[3] = state[3][2]; + state[0][2] = gf_mul[col[0]][5]; + state[0][2] ^= gf_mul[col[1]][3]; + state[0][2] ^= gf_mul[col[2]][4]; + state[0][2] ^= gf_mul[col[3]][2]; + state[1][2] = gf_mul[col[0]][2]; + state[1][2] ^= gf_mul[col[1]][5]; + state[1][2] ^= gf_mul[col[2]][3]; + state[1][2] ^= gf_mul[col[3]][4]; + state[2][2] = gf_mul[col[0]][4]; + state[2][2] ^= gf_mul[col[1]][2]; + state[2][2] ^= gf_mul[col[2]][5]; + state[2][2] ^= gf_mul[col[3]][3]; + state[3][2] = gf_mul[col[0]][3]; + state[3][2] ^= gf_mul[col[1]][4]; + state[3][2] ^= gf_mul[col[2]][2]; + state[3][2] ^= gf_mul[col[3]][5]; + // Column 4 + col[0] = state[0][3]; + col[1] = state[1][3]; + col[2] = state[2][3]; + col[3] = state[3][3]; + state[0][3] = gf_mul[col[0]][5]; + state[0][3] ^= gf_mul[col[1]][3]; + state[0][3] ^= gf_mul[col[2]][4]; + state[0][3] ^= gf_mul[col[3]][2]; + state[1][3] = gf_mul[col[0]][2]; + state[1][3] ^= gf_mul[col[1]][5]; + state[1][3] ^= gf_mul[col[2]][3]; + state[1][3] ^= gf_mul[col[3]][4]; + state[2][3] = gf_mul[col[0]][4]; + state[2][3] ^= gf_mul[col[1]][2]; + state[2][3] ^= gf_mul[col[2]][5]; + state[2][3] ^= gf_mul[col[3]][3]; + state[3][3] = gf_mul[col[0]][3]; + state[3][3] ^= gf_mul[col[1]][4]; + state[3][3] ^= gf_mul[col[2]][2]; + state[3][3] ^= gf_mul[col[3]][5]; +} + +///////////////// +// (En/De)Crypt +///////////////// + +void aes_encrypt(const BYTE in[], BYTE out[], const WORD key[], int keysize) +{ + BYTE state[4][4]; + + // Copy input array (should be 16 bytes long) to a matrix (sequential bytes are ordered + // by row, not col) called "state" for processing. + // *** Implementation note: The official AES documentation references the state by + // column, then row. Accessing an element in C requires row then column. Thus, all state + // references in AES must have the column and row indexes reversed for C implementation. + state[0][0] = in[0]; + state[1][0] = in[1]; + state[2][0] = in[2]; + state[3][0] = in[3]; + state[0][1] = in[4]; + state[1][1] = in[5]; + state[2][1] = in[6]; + state[3][1] = in[7]; + state[0][2] = in[8]; + state[1][2] = in[9]; + state[2][2] = in[10]; + state[3][2] = in[11]; + state[0][3] = in[12]; + state[1][3] = in[13]; + state[2][3] = in[14]; + state[3][3] = in[15]; + + // Perform the necessary number of rounds. The round key is added first. + // The last round does not perform the MixColumns step. + AddRoundKey(state,&key[0]); + SubBytes(state); ShiftRows(state); MixColumns(state); AddRoundKey(state,&key[4]); + SubBytes(state); ShiftRows(state); MixColumns(state); AddRoundKey(state,&key[8]); + SubBytes(state); ShiftRows(state); MixColumns(state); AddRoundKey(state,&key[12]); + SubBytes(state); ShiftRows(state); MixColumns(state); AddRoundKey(state,&key[16]); + SubBytes(state); ShiftRows(state); MixColumns(state); AddRoundKey(state,&key[20]); + SubBytes(state); ShiftRows(state); MixColumns(state); AddRoundKey(state,&key[24]); + SubBytes(state); ShiftRows(state); MixColumns(state); AddRoundKey(state,&key[28]); + SubBytes(state); ShiftRows(state); MixColumns(state); AddRoundKey(state,&key[32]); + SubBytes(state); ShiftRows(state); MixColumns(state); AddRoundKey(state,&key[36]); + if (keysize != 128) { + SubBytes(state); ShiftRows(state); MixColumns(state); AddRoundKey(state,&key[40]); + SubBytes(state); ShiftRows(state); MixColumns(state); AddRoundKey(state,&key[44]); + if (keysize != 192) { + SubBytes(state); ShiftRows(state); MixColumns(state); AddRoundKey(state,&key[48]); + SubBytes(state); ShiftRows(state); MixColumns(state); AddRoundKey(state,&key[52]); + SubBytes(state); ShiftRows(state); AddRoundKey(state,&key[56]); + } + else { + SubBytes(state); ShiftRows(state); AddRoundKey(state,&key[48]); + } + } + else { + SubBytes(state); ShiftRows(state); AddRoundKey(state,&key[40]); + } + + // Copy the state to the output array. + out[0] = state[0][0]; + out[1] = state[1][0]; + out[2] = state[2][0]; + out[3] = state[3][0]; + out[4] = state[0][1]; + out[5] = state[1][1]; + out[6] = state[2][1]; + out[7] = state[3][1]; + out[8] = state[0][2]; + out[9] = state[1][2]; + out[10] = state[2][2]; + out[11] = state[3][2]; + out[12] = state[0][3]; + out[13] = state[1][3]; + out[14] = state[2][3]; + out[15] = state[3][3]; +} + +void aes_decrypt(const BYTE in[], BYTE out[], const WORD key[], int keysize) +{ + BYTE state[4][4]; + + // Copy the input to the state. + state[0][0] = in[0]; + state[1][0] = in[1]; + state[2][0] = in[2]; + state[3][0] = in[3]; + state[0][1] = in[4]; + state[1][1] = in[5]; + state[2][1] = in[6]; + state[3][1] = in[7]; + state[0][2] = in[8]; + state[1][2] = in[9]; + state[2][2] = in[10]; + state[3][2] = in[11]; + state[0][3] = in[12]; + state[1][3] = in[13]; + state[2][3] = in[14]; + state[3][3] = in[15]; + + // Perform the necessary number of rounds. The round key is added first. + // The last round does not perform the MixColumns step. + if (keysize > 128) { + if (keysize > 192) { + AddRoundKey(state,&key[56]); + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[52]);InvMixColumns(state); + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[48]);InvMixColumns(state); + } + else { + AddRoundKey(state,&key[48]); + } + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[44]);InvMixColumns(state); + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[40]);InvMixColumns(state); + } + else { + AddRoundKey(state,&key[40]); + } + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[36]);InvMixColumns(state); + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[32]);InvMixColumns(state); + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[28]);InvMixColumns(state); + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[24]);InvMixColumns(state); + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[20]);InvMixColumns(state); + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[16]);InvMixColumns(state); + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[12]);InvMixColumns(state); + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[8]);InvMixColumns(state); + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[4]);InvMixColumns(state); + InvShiftRows(state);InvSubBytes(state);AddRoundKey(state,&key[0]); + + // Copy the state to the output array. + out[0] = state[0][0]; + out[1] = state[1][0]; + out[2] = state[2][0]; + out[3] = state[3][0]; + out[4] = state[0][1]; + out[5] = state[1][1]; + out[6] = state[2][1]; + out[7] = state[3][1]; + out[8] = state[0][2]; + out[9] = state[1][2]; + out[10] = state[2][2]; + out[11] = state[3][2]; + out[12] = state[0][3]; + out[13] = state[1][3]; + out[14] = state[2][3]; + out[15] = state[3][3]; +} + +/******************* +** AES DEBUGGING FUNCTIONS +*******************/ +/* +// This prints the "state" grid as a linear hex string. +void print_state(BYTE state[][4]) +{ + int idx,idx2; + + for (idx=0; idx < 4; idx++) + for (idx2=0; idx2 < 4; idx2++) + printf("%02x",state[idx2][idx]); + printf("\n"); +} + +// This prints the key (4 consecutive ints) used for a given round as a linear hex string. +void print_rnd_key(WORD key[]) +{ + int idx; + + for (idx=0; idx < 4; idx++) + printf("%08x",key[idx]); + printf("\n"); +} +*/ diff --git a/cores/saturn/deps/crypto/aes.h b/cores/saturn/deps/crypto/aes.h new file mode 100644 index 000000000..e7d161439 --- /dev/null +++ b/cores/saturn/deps/crypto/aes.h @@ -0,0 +1,123 @@ +/********************************************************************* +* Filename: aes.h +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Defines the API for the corresponding AES implementation. +*********************************************************************/ + +#ifndef AES_H +#define AES_H + +/*************************** HEADER FILES ***************************/ +#include + +/****************************** MACROS ******************************/ +#define AES_BLOCK_SIZE 16 // AES operates on 16 bytes at a time + +/**************************** DATA TYPES ****************************/ +typedef unsigned char BYTE; // 8-bit byte +typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines + +/*********************** FUNCTION DECLARATIONS **********************/ +/////////////////// +// AES +/////////////////// +// Key setup must be done before any AES en/de-cryption functions can be used. +void aes_key_setup(const BYTE key[], // The key, must be 128, 192, or 256 bits + WORD w[], // Output key schedule to be used later + int keysize); // Bit length of the key, 128, 192, or 256 + +void aes_encrypt(const BYTE in[], // 16 bytes of plaintext + BYTE out[], // 16 bytes of ciphertext + const WORD key[], // From the key setup + int keysize); // Bit length of the key, 128, 192, or 256 + +void aes_decrypt(const BYTE in[], // 16 bytes of ciphertext + BYTE out[], // 16 bytes of plaintext + const WORD key[], // From the key setup + int keysize); // Bit length of the key, 128, 192, or 256 + +/////////////////// +// AES - CBC +/////////////////// +int aes_encrypt_cbc(const BYTE in[], // Plaintext + size_t in_len, // Must be a multiple of AES_BLOCK_SIZE + BYTE out[], // Ciphertext, same length as plaintext + const WORD key[], // From the key setup + int keysize, // Bit length of the key, 128, 192, or 256 + const BYTE iv[]); // IV, must be AES_BLOCK_SIZE bytes long + +// Only output the CBC-MAC of the input. +int aes_encrypt_cbc_mac(const BYTE in[], // plaintext + size_t in_len, // Must be a multiple of AES_BLOCK_SIZE + BYTE out[], // Output MAC + const WORD key[], // From the key setup + int keysize, // Bit length of the key, 128, 192, or 256 + const BYTE iv[]); // IV, must be AES_BLOCK_SIZE bytes long + +/////////////////// +// AES - CTR +/////////////////// +void increment_iv(BYTE iv[], // Must be a multiple of AES_BLOCK_SIZE + int counter_size); // Bytes of the IV used for counting (low end) + +void aes_encrypt_ctr(const BYTE in[], // Plaintext + size_t in_len, // Any byte length + BYTE out[], // Ciphertext, same length as plaintext + const WORD key[], // From the key setup + int keysize, // Bit length of the key, 128, 192, or 256 + const BYTE iv[]); // IV, must be AES_BLOCK_SIZE bytes long + +void aes_decrypt_ctr(const BYTE in[], // Ciphertext + size_t in_len, // Any byte length + BYTE out[], // Plaintext, same length as ciphertext + const WORD key[], // From the key setup + int keysize, // Bit length of the key, 128, 192, or 256 + const BYTE iv[]); // IV, must be AES_BLOCK_SIZE bytes long + +/////////////////// +// AES - CCM +/////////////////// +// Returns True if the input parameters do not violate any constraint. +int aes_encrypt_ccm(const BYTE plaintext[], // IN - Plaintext. + WORD plaintext_len, // IN - Plaintext length. + const BYTE associated_data[], // IN - Associated Data included in authentication, but not encryption. + unsigned short associated_data_len, // IN - Associated Data length in bytes. + const BYTE nonce[], // IN - The Nonce to be used for encryption. + unsigned short nonce_len, // IN - Nonce length in bytes. + BYTE ciphertext[], // OUT - Ciphertext, a concatination of the plaintext and the MAC. + WORD *ciphertext_len, // OUT - The length of the ciphertext, always plaintext_len + mac_len. + WORD mac_len, // IN - The desired length of the MAC, must be 4, 6, 8, 10, 12, 14, or 16. + const BYTE key[], // IN - The AES key for encryption. + int keysize); // IN - The length of the key in bits. Valid values are 128, 192, 256. + +// Returns True if the input parameters do not violate any constraint. +// Use mac_auth to ensure decryption/validation was preformed correctly. +// If authentication does not succeed, the plaintext is zeroed out. To overwride +// this, call with mac_auth = NULL. The proper proceedure is to decrypt with +// authentication enabled (mac_auth != NULL) and make a second call to that +// ignores authentication explicitly if the first call failes. +int aes_decrypt_ccm(const BYTE ciphertext[], // IN - Ciphertext, the concatination of encrypted plaintext and MAC. + WORD ciphertext_len, // IN - Ciphertext length in bytes. + const BYTE assoc[], // IN - The Associated Data, required for authentication. + unsigned short assoc_len, // IN - Associated Data length in bytes. + const BYTE nonce[], // IN - The Nonce to use for decryption, same one as for encryption. + unsigned short nonce_len, // IN - Nonce length in bytes. + BYTE plaintext[], // OUT - The plaintext that was decrypted. Will need to be large enough to hold ciphertext_len - mac_len. + WORD *plaintext_len, // OUT - Length in bytes of the output plaintext, always ciphertext_len - mac_len . + WORD mac_len, // IN - The length of the MAC that was calculated. + int *mac_auth, // OUT - TRUE if authentication succeeded, FALSE if it did not. NULL pointer will ignore the authentication. + const BYTE key[], // IN - The AES key for decryption. + int keysize); // IN - The length of the key in BITS. Valid values are 128, 192, 256. + +/////////////////// +// Test functions +/////////////////// +int aes_test(); +int aes_ecb_test(); +int aes_cbc_test(); +int aes_ctr_test(); +int aes_ccm_test(); + +#endif // AES_H diff --git a/cores/saturn/deps/crypto/arcfour.c b/cores/saturn/deps/crypto/arcfour.c new file mode 100644 index 000000000..37a88e104 --- /dev/null +++ b/cores/saturn/deps/crypto/arcfour.c @@ -0,0 +1,47 @@ +/********************************************************************* +* Filename: arcfour.c +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Implementation of the ARCFOUR encryption algorithm. + Algorithm specification can be found here: + * http://en.wikipedia.org/wiki/RC4 +*********************************************************************/ + +/*************************** HEADER FILES ***************************/ +#include +#include "arcfour.h" + +/*********************** FUNCTION DEFINITIONS ***********************/ +void arcfour_key_setup(BYTE state[], const BYTE key[], int len) +{ + int i, j; + BYTE t; + + for (i = 0; i < 256; ++i) + state[i] = i; + for (i = 0, j = 0; i < 256; ++i) { + j = (j + state[i] + key[i % len]) % 256; + t = state[i]; + state[i] = state[j]; + state[j] = t; + } +} + +// This does not hold state between calls. It always generates the +// stream starting from the first output byte. +void arcfour_generate_stream(BYTE state[], BYTE out[], size_t len) +{ + int i, j; + size_t idx; + BYTE t; + + for (idx = 0, i = 0, j = 0; idx < len; ++idx) { + i = (i + 1) % 256; + j = (j + state[i]) % 256; + t = state[i]; + state[i] = state[j]; + state[j] = t; + out[idx] = state[(state[i] + state[j]) % 256]; + } +} diff --git a/cores/saturn/deps/crypto/arcfour.h b/cores/saturn/deps/crypto/arcfour.h new file mode 100644 index 000000000..f9f1e87df --- /dev/null +++ b/cores/saturn/deps/crypto/arcfour.h @@ -0,0 +1,30 @@ +/********************************************************************* +* Filename: arcfour.h +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Defines the API for the corresponding ARCFOUR implementation. +*********************************************************************/ + +#ifndef ARCFOUR_H +#define ARCFOUR_H + +/*************************** HEADER FILES ***************************/ +#include + +/**************************** DATA TYPES ****************************/ +typedef unsigned char BYTE; // 8-bit byte + +/*********************** FUNCTION DECLARATIONS **********************/ +// Input: state - the state used to generate the keystream +// key - Key to use to initialize the state +// len - length of key in bytes (valid lenth is 1 to 256) +void arcfour_key_setup(BYTE state[], const BYTE key[], int len); + +// Pseudo-Random Generator Algorithm +// Input: state - the state used to generate the keystream +// out - Must be allocated to be of at least "len" length +// len - number of bytes to generate +void arcfour_generate_stream(BYTE state[], BYTE out[], size_t len); + +#endif // ARCFOUR_H diff --git a/cores/saturn/deps/crypto/base64.c b/cores/saturn/deps/crypto/base64.c new file mode 100644 index 000000000..5e89808cb --- /dev/null +++ b/cores/saturn/deps/crypto/base64.c @@ -0,0 +1,135 @@ +/********************************************************************* +* Filename: base64.c +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Implementation of the Base64 encoding algorithm. +*********************************************************************/ + +/*************************** HEADER FILES ***************************/ +#include +#include "base64.h" + +/****************************** MACROS ******************************/ +#define NEWLINE_INVL 76 + +/**************************** VARIABLES *****************************/ +// Note: To change the charset to a URL encoding, replace the '+' and '/' with '*' and '-' +static const BYTE charset[]={"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"}; + +/*********************** FUNCTION DEFINITIONS ***********************/ +BYTE revchar(char ch) +{ + if (ch >= 'A' && ch <= 'Z') + ch -= 'A'; + else if (ch >= 'a' && ch <='z') + ch = ch - 'a' + 26; + else if (ch >= '0' && ch <='9') + ch = ch - '0' + 52; + else if (ch == '+') + ch = 62; + else if (ch == '/') + ch = 63; + + return(ch); +} + +size_t base64_encode(const BYTE in[], BYTE out[], size_t len, int newline_flag) +{ + size_t idx, idx2, blks, blk_ceiling, left_over, newline_count = 0; + + blks = (len / 3); + left_over = len % 3; + + if (out == NULL) { + idx2 = blks * 4 ; + if (left_over) + idx2 += 4; + if (newline_flag) + idx2 += len / 57; // (NEWLINE_INVL / 4) * 3 = 57. One newline per 57 input bytes. + } + else { + // Since 3 input bytes = 4 output bytes, determine out how many even sets of + // 3 bytes the input has. + blk_ceiling = blks * 3; + for (idx = 0, idx2 = 0; idx < blk_ceiling; idx += 3, idx2 += 4) { + out[idx2] = charset[in[idx] >> 2]; + out[idx2 + 1] = charset[((in[idx] & 0x03) << 4) | (in[idx + 1] >> 4)]; + out[idx2 + 2] = charset[((in[idx + 1] & 0x0f) << 2) | (in[idx + 2] >> 6)]; + out[idx2 + 3] = charset[in[idx + 2] & 0x3F]; + // The offical standard requires a newline every 76 characters. + // (Eg, first newline is character 77 of the output.) + if (((idx2 - newline_count + 4) % NEWLINE_INVL == 0) && newline_flag) { + out[idx2 + 4] = '\n'; + idx2++; + newline_count++; + } + } + + if (left_over == 1) { + out[idx2] = charset[in[idx] >> 2]; + out[idx2 + 1] = charset[(in[idx] & 0x03) << 4]; + out[idx2 + 2] = '='; + out[idx2 + 3] = '='; + idx2 += 4; + } + else if (left_over == 2) { + out[idx2] = charset[in[idx] >> 2]; + out[idx2 + 1] = charset[((in[idx] & 0x03) << 4) | (in[idx + 1] >> 4)]; + out[idx2 + 2] = charset[(in[idx + 1] & 0x0F) << 2]; + out[idx2 + 3] = '='; + idx2 += 4; + } + } + + return(idx2); +} + +size_t base64_decode(const BYTE in[], BYTE out[], size_t len) +{ + BYTE ch; + size_t idx, idx2, blks, blk_ceiling, left_over; + + if (in[len - 1] == '=') + len--; + if (in[len - 1] == '=') + len--; + + blks = len / 4; + left_over = len % 4; + + if (out == NULL) { + if (len >= 77 && in[NEWLINE_INVL] == '\n') // Verify that newlines where used. + len -= len / (NEWLINE_INVL + 1); + blks = len / 4; + left_over = len % 4; + + idx = blks * 3; + if (left_over == 2) + idx ++; + else if (left_over == 3) + idx += 2; + } + else { + blk_ceiling = blks * 4; + for (idx = 0, idx2 = 0; idx2 < blk_ceiling; idx += 3, idx2 += 4) { + if (in[idx2] == '\n') + idx2++; + out[idx] = (revchar(in[idx2]) << 2) | ((revchar(in[idx2 + 1]) & 0x30) >> 4); + out[idx + 1] = (revchar(in[idx2 + 1]) << 4) | (revchar(in[idx2 + 2]) >> 2); + out[idx + 2] = (revchar(in[idx2 + 2]) << 6) | revchar(in[idx2 + 3]); + } + + if (left_over == 2) { + out[idx] = (revchar(in[idx2]) << 2) | ((revchar(in[idx2 + 1]) & 0x30) >> 4); + idx++; + } + else if (left_over == 3) { + out[idx] = (revchar(in[idx2]) << 2) | ((revchar(in[idx2 + 1]) & 0x30) >> 4); + out[idx + 1] = (revchar(in[idx2 + 1]) << 4) | (revchar(in[idx2 + 2]) >> 2); + idx += 2; + } + } + + return(idx); +} diff --git a/cores/saturn/deps/crypto/base64.h b/cores/saturn/deps/crypto/base64.h new file mode 100644 index 000000000..e35c6c7d9 --- /dev/null +++ b/cores/saturn/deps/crypto/base64.h @@ -0,0 +1,27 @@ +/********************************************************************* +* Filename: base64.h +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Defines the API for the corresponding Base64 implementation. +*********************************************************************/ + +#ifndef BASE64_H +#define BASE64_H + +/*************************** HEADER FILES ***************************/ +#include + +/**************************** DATA TYPES ****************************/ +typedef unsigned char BYTE; // 8-bit byte + +/*********************** FUNCTION DECLARATIONS **********************/ +// Returns the size of the output. If called with out = NULL, will just return +// the size of what the output would have been (without a terminating NULL). +size_t base64_encode(const BYTE in[], BYTE out[], size_t len, int newline_flag); + +// Returns the size of the output. If called with out = NULL, will just return +// the size of what the output would have been (without a terminating NULL). +size_t base64_decode(const BYTE in[], BYTE out[], size_t len); + +#endif // BASE64_H diff --git a/cores/saturn/deps/crypto/blowfish.c b/cores/saturn/deps/crypto/blowfish.c new file mode 100644 index 000000000..8de91ddbb --- /dev/null +++ b/cores/saturn/deps/crypto/blowfish.c @@ -0,0 +1,269 @@ +/********************************************************************* +* Filename: blowfish.c +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Implementation of the Blowfish encryption algorithm. + Modes of operation (such as CBC) are not included. + Algorithm specification can be found here: + * http://www.schneier.com/blowfish.html +*********************************************************************/ + +/*************************** HEADER FILES ***************************/ +#include +#include +#include "blowfish.h" + +/****************************** MACROS ******************************/ +#define F(x,t) t = keystruct->s[0][(x) >> 24]; \ + t += keystruct->s[1][((x) >> 16) & 0xff]; \ + t ^= keystruct->s[2][((x) >> 8) & 0xff]; \ + t += keystruct->s[3][(x) & 0xff]; +#define swap(r,l,t) t = l; l = r; r = t; +#define ITERATION(l,r,t,pval) l ^= keystruct->p[pval]; F(l,t); r^= t; swap(r,l,t); + +/**************************** VARIABLES *****************************/ +static const WORD p_perm[18] = { + 0x243F6A88,0x85A308D3,0x13198A2E,0x03707344,0xA4093822,0x299F31D0,0x082EFA98, + 0xEC4E6C89,0x452821E6,0x38D01377,0xBE5466CF,0x34E90C6C,0xC0AC29B7,0xC97C50DD, + 0x3F84D5B5,0xB5470917,0x9216D5D9,0x8979FB1B +}; + +static const WORD s_perm[4][256] = { { + 0xD1310BA6,0x98DFB5AC,0x2FFD72DB,0xD01ADFB7,0xB8E1AFED,0x6A267E96,0xBA7C9045,0xF12C7F99, + 0x24A19947,0xB3916CF7,0x0801F2E2,0x858EFC16,0x636920D8,0x71574E69,0xA458FEA3,0xF4933D7E, + 0x0D95748F,0x728EB658,0x718BCD58,0x82154AEE,0x7B54A41D,0xC25A59B5,0x9C30D539,0x2AF26013, + 0xC5D1B023,0x286085F0,0xCA417918,0xB8DB38EF,0x8E79DCB0,0x603A180E,0x6C9E0E8B,0xB01E8A3E, + 0xD71577C1,0xBD314B27,0x78AF2FDA,0x55605C60,0xE65525F3,0xAA55AB94,0x57489862,0x63E81440, + 0x55CA396A,0x2AAB10B6,0xB4CC5C34,0x1141E8CE,0xA15486AF,0x7C72E993,0xB3EE1411,0x636FBC2A, + 0x2BA9C55D,0x741831F6,0xCE5C3E16,0x9B87931E,0xAFD6BA33,0x6C24CF5C,0x7A325381,0x28958677, + 0x3B8F4898,0x6B4BB9AF,0xC4BFE81B,0x66282193,0x61D809CC,0xFB21A991,0x487CAC60,0x5DEC8032, + 0xEF845D5D,0xE98575B1,0xDC262302,0xEB651B88,0x23893E81,0xD396ACC5,0x0F6D6FF3,0x83F44239, + 0x2E0B4482,0xA4842004,0x69C8F04A,0x9E1F9B5E,0x21C66842,0xF6E96C9A,0x670C9C61,0xABD388F0, + 0x6A51A0D2,0xD8542F68,0x960FA728,0xAB5133A3,0x6EEF0B6C,0x137A3BE4,0xBA3BF050,0x7EFB2A98, + 0xA1F1651D,0x39AF0176,0x66CA593E,0x82430E88,0x8CEE8619,0x456F9FB4,0x7D84A5C3,0x3B8B5EBE, + 0xE06F75D8,0x85C12073,0x401A449F,0x56C16AA6,0x4ED3AA62,0x363F7706,0x1BFEDF72,0x429B023D, + 0x37D0D724,0xD00A1248,0xDB0FEAD3,0x49F1C09B,0x075372C9,0x80991B7B,0x25D479D8,0xF6E8DEF7, + 0xE3FE501A,0xB6794C3B,0x976CE0BD,0x04C006BA,0xC1A94FB6,0x409F60C4,0x5E5C9EC2,0x196A2463, + 0x68FB6FAF,0x3E6C53B5,0x1339B2EB,0x3B52EC6F,0x6DFC511F,0x9B30952C,0xCC814544,0xAF5EBD09, + 0xBEE3D004,0xDE334AFD,0x660F2807,0x192E4BB3,0xC0CBA857,0x45C8740F,0xD20B5F39,0xB9D3FBDB, + 0x5579C0BD,0x1A60320A,0xD6A100C6,0x402C7279,0x679F25FE,0xFB1FA3CC,0x8EA5E9F8,0xDB3222F8, + 0x3C7516DF,0xFD616B15,0x2F501EC8,0xAD0552AB,0x323DB5FA,0xFD238760,0x53317B48,0x3E00DF82, + 0x9E5C57BB,0xCA6F8CA0,0x1A87562E,0xDF1769DB,0xD542A8F6,0x287EFFC3,0xAC6732C6,0x8C4F5573, + 0x695B27B0,0xBBCA58C8,0xE1FFA35D,0xB8F011A0,0x10FA3D98,0xFD2183B8,0x4AFCB56C,0x2DD1D35B, + 0x9A53E479,0xB6F84565,0xD28E49BC,0x4BFB9790,0xE1DDF2DA,0xA4CB7E33,0x62FB1341,0xCEE4C6E8, + 0xEF20CADA,0x36774C01,0xD07E9EFE,0x2BF11FB4,0x95DBDA4D,0xAE909198,0xEAAD8E71,0x6B93D5A0, + 0xD08ED1D0,0xAFC725E0,0x8E3C5B2F,0x8E7594B7,0x8FF6E2FB,0xF2122B64,0x8888B812,0x900DF01C, + 0x4FAD5EA0,0x688FC31C,0xD1CFF191,0xB3A8C1AD,0x2F2F2218,0xBE0E1777,0xEA752DFE,0x8B021FA1, + 0xE5A0CC0F,0xB56F74E8,0x18ACF3D6,0xCE89E299,0xB4A84FE0,0xFD13E0B7,0x7CC43B81,0xD2ADA8D9, + 0x165FA266,0x80957705,0x93CC7314,0x211A1477,0xE6AD2065,0x77B5FA86,0xC75442F5,0xFB9D35CF, + 0xEBCDAF0C,0x7B3E89A0,0xD6411BD3,0xAE1E7E49,0x00250E2D,0x2071B35E,0x226800BB,0x57B8E0AF, + 0x2464369B,0xF009B91E,0x5563911D,0x59DFA6AA,0x78C14389,0xD95A537F,0x207D5BA2,0x02E5B9C5, + 0x83260376,0x6295CFA9,0x11C81968,0x4E734A41,0xB3472DCA,0x7B14A94A,0x1B510052,0x9A532915, + 0xD60F573F,0xBC9BC6E4,0x2B60A476,0x81E67400,0x08BA6FB5,0x571BE91F,0xF296EC6B,0x2A0DD915, + 0xB6636521,0xE7B9F9B6,0xFF34052E,0xC5855664,0x53B02D5D,0xA99F8FA1,0x08BA4799,0x6E85076A +},{ + 0x4B7A70E9,0xB5B32944,0xDB75092E,0xC4192623,0xAD6EA6B0,0x49A7DF7D,0x9CEE60B8,0x8FEDB266, + 0xECAA8C71,0x699A17FF,0x5664526C,0xC2B19EE1,0x193602A5,0x75094C29,0xA0591340,0xE4183A3E, + 0x3F54989A,0x5B429D65,0x6B8FE4D6,0x99F73FD6,0xA1D29C07,0xEFE830F5,0x4D2D38E6,0xF0255DC1, + 0x4CDD2086,0x8470EB26,0x6382E9C6,0x021ECC5E,0x09686B3F,0x3EBAEFC9,0x3C971814,0x6B6A70A1, + 0x687F3584,0x52A0E286,0xB79C5305,0xAA500737,0x3E07841C,0x7FDEAE5C,0x8E7D44EC,0x5716F2B8, + 0xB03ADA37,0xF0500C0D,0xF01C1F04,0x0200B3FF,0xAE0CF51A,0x3CB574B2,0x25837A58,0xDC0921BD, + 0xD19113F9,0x7CA92FF6,0x94324773,0x22F54701,0x3AE5E581,0x37C2DADC,0xC8B57634,0x9AF3DDA7, + 0xA9446146,0x0FD0030E,0xECC8C73E,0xA4751E41,0xE238CD99,0x3BEA0E2F,0x3280BBA1,0x183EB331, + 0x4E548B38,0x4F6DB908,0x6F420D03,0xF60A04BF,0x2CB81290,0x24977C79,0x5679B072,0xBCAF89AF, + 0xDE9A771F,0xD9930810,0xB38BAE12,0xDCCF3F2E,0x5512721F,0x2E6B7124,0x501ADDE6,0x9F84CD87, + 0x7A584718,0x7408DA17,0xBC9F9ABC,0xE94B7D8C,0xEC7AEC3A,0xDB851DFA,0x63094366,0xC464C3D2, + 0xEF1C1847,0x3215D908,0xDD433B37,0x24C2BA16,0x12A14D43,0x2A65C451,0x50940002,0x133AE4DD, + 0x71DFF89E,0x10314E55,0x81AC77D6,0x5F11199B,0x043556F1,0xD7A3C76B,0x3C11183B,0x5924A509, + 0xF28FE6ED,0x97F1FBFA,0x9EBABF2C,0x1E153C6E,0x86E34570,0xEAE96FB1,0x860E5E0A,0x5A3E2AB3, + 0x771FE71C,0x4E3D06FA,0x2965DCB9,0x99E71D0F,0x803E89D6,0x5266C825,0x2E4CC978,0x9C10B36A, + 0xC6150EBA,0x94E2EA78,0xA5FC3C53,0x1E0A2DF4,0xF2F74EA7,0x361D2B3D,0x1939260F,0x19C27960, + 0x5223A708,0xF71312B6,0xEBADFE6E,0xEAC31F66,0xE3BC4595,0xA67BC883,0xB17F37D1,0x018CFF28, + 0xC332DDEF,0xBE6C5AA5,0x65582185,0x68AB9802,0xEECEA50F,0xDB2F953B,0x2AEF7DAD,0x5B6E2F84, + 0x1521B628,0x29076170,0xECDD4775,0x619F1510,0x13CCA830,0xEB61BD96,0x0334FE1E,0xAA0363CF, + 0xB5735C90,0x4C70A239,0xD59E9E0B,0xCBAADE14,0xEECC86BC,0x60622CA7,0x9CAB5CAB,0xB2F3846E, + 0x648B1EAF,0x19BDF0CA,0xA02369B9,0x655ABB50,0x40685A32,0x3C2AB4B3,0x319EE9D5,0xC021B8F7, + 0x9B540B19,0x875FA099,0x95F7997E,0x623D7DA8,0xF837889A,0x97E32D77,0x11ED935F,0x16681281, + 0x0E358829,0xC7E61FD6,0x96DEDFA1,0x7858BA99,0x57F584A5,0x1B227263,0x9B83C3FF,0x1AC24696, + 0xCDB30AEB,0x532E3054,0x8FD948E4,0x6DBC3128,0x58EBF2EF,0x34C6FFEA,0xFE28ED61,0xEE7C3C73, + 0x5D4A14D9,0xE864B7E3,0x42105D14,0x203E13E0,0x45EEE2B6,0xA3AAABEA,0xDB6C4F15,0xFACB4FD0, + 0xC742F442,0xEF6ABBB5,0x654F3B1D,0x41CD2105,0xD81E799E,0x86854DC7,0xE44B476A,0x3D816250, + 0xCF62A1F2,0x5B8D2646,0xFC8883A0,0xC1C7B6A3,0x7F1524C3,0x69CB7492,0x47848A0B,0x5692B285, + 0x095BBF00,0xAD19489D,0x1462B174,0x23820E00,0x58428D2A,0x0C55F5EA,0x1DADF43E,0x233F7061, + 0x3372F092,0x8D937E41,0xD65FECF1,0x6C223BDB,0x7CDE3759,0xCBEE7460,0x4085F2A7,0xCE77326E, + 0xA6078084,0x19F8509E,0xE8EFD855,0x61D99735,0xA969A7AA,0xC50C06C2,0x5A04ABFC,0x800BCADC, + 0x9E447A2E,0xC3453484,0xFDD56705,0x0E1E9EC9,0xDB73DBD3,0x105588CD,0x675FDA79,0xE3674340, + 0xC5C43465,0x713E38D8,0x3D28F89E,0xF16DFF20,0x153E21E7,0x8FB03D4A,0xE6E39F2B,0xDB83ADF7 +},{ + 0xE93D5A68,0x948140F7,0xF64C261C,0x94692934,0x411520F7,0x7602D4F7,0xBCF46B2E,0xD4A20068, + 0xD4082471,0x3320F46A,0x43B7D4B7,0x500061AF,0x1E39F62E,0x97244546,0x14214F74,0xBF8B8840, + 0x4D95FC1D,0x96B591AF,0x70F4DDD3,0x66A02F45,0xBFBC09EC,0x03BD9785,0x7FAC6DD0,0x31CB8504, + 0x96EB27B3,0x55FD3941,0xDA2547E6,0xABCA0A9A,0x28507825,0x530429F4,0x0A2C86DA,0xE9B66DFB, + 0x68DC1462,0xD7486900,0x680EC0A4,0x27A18DEE,0x4F3FFEA2,0xE887AD8C,0xB58CE006,0x7AF4D6B6, + 0xAACE1E7C,0xD3375FEC,0xCE78A399,0x406B2A42,0x20FE9E35,0xD9F385B9,0xEE39D7AB,0x3B124E8B, + 0x1DC9FAF7,0x4B6D1856,0x26A36631,0xEAE397B2,0x3A6EFA74,0xDD5B4332,0x6841E7F7,0xCA7820FB, + 0xFB0AF54E,0xD8FEB397,0x454056AC,0xBA489527,0x55533A3A,0x20838D87,0xFE6BA9B7,0xD096954B, + 0x55A867BC,0xA1159A58,0xCCA92963,0x99E1DB33,0xA62A4A56,0x3F3125F9,0x5EF47E1C,0x9029317C, + 0xFDF8E802,0x04272F70,0x80BB155C,0x05282CE3,0x95C11548,0xE4C66D22,0x48C1133F,0xC70F86DC, + 0x07F9C9EE,0x41041F0F,0x404779A4,0x5D886E17,0x325F51EB,0xD59BC0D1,0xF2BCC18F,0x41113564, + 0x257B7834,0x602A9C60,0xDFF8E8A3,0x1F636C1B,0x0E12B4C2,0x02E1329E,0xAF664FD1,0xCAD18115, + 0x6B2395E0,0x333E92E1,0x3B240B62,0xEEBEB922,0x85B2A20E,0xE6BA0D99,0xDE720C8C,0x2DA2F728, + 0xD0127845,0x95B794FD,0x647D0862,0xE7CCF5F0,0x5449A36F,0x877D48FA,0xC39DFD27,0xF33E8D1E, + 0x0A476341,0x992EFF74,0x3A6F6EAB,0xF4F8FD37,0xA812DC60,0xA1EBDDF8,0x991BE14C,0xDB6E6B0D, + 0xC67B5510,0x6D672C37,0x2765D43B,0xDCD0E804,0xF1290DC7,0xCC00FFA3,0xB5390F92,0x690FED0B, + 0x667B9FFB,0xCEDB7D9C,0xA091CF0B,0xD9155EA3,0xBB132F88,0x515BAD24,0x7B9479BF,0x763BD6EB, + 0x37392EB3,0xCC115979,0x8026E297,0xF42E312D,0x6842ADA7,0xC66A2B3B,0x12754CCC,0x782EF11C, + 0x6A124237,0xB79251E7,0x06A1BBE6,0x4BFB6350,0x1A6B1018,0x11CAEDFA,0x3D25BDD8,0xE2E1C3C9, + 0x44421659,0x0A121386,0xD90CEC6E,0xD5ABEA2A,0x64AF674E,0xDA86A85F,0xBEBFE988,0x64E4C3FE, + 0x9DBC8057,0xF0F7C086,0x60787BF8,0x6003604D,0xD1FD8346,0xF6381FB0,0x7745AE04,0xD736FCCC, + 0x83426B33,0xF01EAB71,0xB0804187,0x3C005E5F,0x77A057BE,0xBDE8AE24,0x55464299,0xBF582E61, + 0x4E58F48F,0xF2DDFDA2,0xF474EF38,0x8789BDC2,0x5366F9C3,0xC8B38E74,0xB475F255,0x46FCD9B9, + 0x7AEB2661,0x8B1DDF84,0x846A0E79,0x915F95E2,0x466E598E,0x20B45770,0x8CD55591,0xC902DE4C, + 0xB90BACE1,0xBB8205D0,0x11A86248,0x7574A99E,0xB77F19B6,0xE0A9DC09,0x662D09A1,0xC4324633, + 0xE85A1F02,0x09F0BE8C,0x4A99A025,0x1D6EFE10,0x1AB93D1D,0x0BA5A4DF,0xA186F20F,0x2868F169, + 0xDCB7DA83,0x573906FE,0xA1E2CE9B,0x4FCD7F52,0x50115E01,0xA70683FA,0xA002B5C4,0x0DE6D027, + 0x9AF88C27,0x773F8641,0xC3604C06,0x61A806B5,0xF0177A28,0xC0F586E0,0x006058AA,0x30DC7D62, + 0x11E69ED7,0x2338EA63,0x53C2DD94,0xC2C21634,0xBBCBEE56,0x90BCB6DE,0xEBFC7DA1,0xCE591D76, + 0x6F05E409,0x4B7C0188,0x39720A3D,0x7C927C24,0x86E3725F,0x724D9DB9,0x1AC15BB4,0xD39EB8FC, + 0xED545578,0x08FCA5B5,0xD83D7CD3,0x4DAD0FC4,0x1E50EF5E,0xB161E6F8,0xA28514D9,0x6C51133C, + 0x6FD5C7E7,0x56E14EC4,0x362ABFCE,0xDDC6C837,0xD79A3234,0x92638212,0x670EFA8E,0x406000E0 +},{ + 0x3A39CE37,0xD3FAF5CF,0xABC27737,0x5AC52D1B,0x5CB0679E,0x4FA33742,0xD3822740,0x99BC9BBE, + 0xD5118E9D,0xBF0F7315,0xD62D1C7E,0xC700C47B,0xB78C1B6B,0x21A19045,0xB26EB1BE,0x6A366EB4, + 0x5748AB2F,0xBC946E79,0xC6A376D2,0x6549C2C8,0x530FF8EE,0x468DDE7D,0xD5730A1D,0x4CD04DC6, + 0x2939BBDB,0xA9BA4650,0xAC9526E8,0xBE5EE304,0xA1FAD5F0,0x6A2D519A,0x63EF8CE2,0x9A86EE22, + 0xC089C2B8,0x43242EF6,0xA51E03AA,0x9CF2D0A4,0x83C061BA,0x9BE96A4D,0x8FE51550,0xBA645BD6, + 0x2826A2F9,0xA73A3AE1,0x4BA99586,0xEF5562E9,0xC72FEFD3,0xF752F7DA,0x3F046F69,0x77FA0A59, + 0x80E4A915,0x87B08601,0x9B09E6AD,0x3B3EE593,0xE990FD5A,0x9E34D797,0x2CF0B7D9,0x022B8B51, + 0x96D5AC3A,0x017DA67D,0xD1CF3ED6,0x7C7D2D28,0x1F9F25CF,0xADF2B89B,0x5AD6B472,0x5A88F54C, + 0xE029AC71,0xE019A5E6,0x47B0ACFD,0xED93FA9B,0xE8D3C48D,0x283B57CC,0xF8D56629,0x79132E28, + 0x785F0191,0xED756055,0xF7960E44,0xE3D35E8C,0x15056DD4,0x88F46DBA,0x03A16125,0x0564F0BD, + 0xC3EB9E15,0x3C9057A2,0x97271AEC,0xA93A072A,0x1B3F6D9B,0x1E6321F5,0xF59C66FB,0x26DCF319, + 0x7533D928,0xB155FDF5,0x03563482,0x8ABA3CBB,0x28517711,0xC20AD9F8,0xABCC5167,0xCCAD925F, + 0x4DE81751,0x3830DC8E,0x379D5862,0x9320F991,0xEA7A90C2,0xFB3E7BCE,0x5121CE64,0x774FBE32, + 0xA8B6E37E,0xC3293D46,0x48DE5369,0x6413E680,0xA2AE0810,0xDD6DB224,0x69852DFD,0x09072166, + 0xB39A460A,0x6445C0DD,0x586CDECF,0x1C20C8AE,0x5BBEF7DD,0x1B588D40,0xCCD2017F,0x6BB4E3BB, + 0xDDA26A7E,0x3A59FF45,0x3E350A44,0xBCB4CDD5,0x72EACEA8,0xFA6484BB,0x8D6612AE,0xBF3C6F47, + 0xD29BE463,0x542F5D9E,0xAEC2771B,0xF64E6370,0x740E0D8D,0xE75B1357,0xF8721671,0xAF537D5D, + 0x4040CB08,0x4EB4E2CC,0x34D2466A,0x0115AF84,0xE1B00428,0x95983A1D,0x06B89FB4,0xCE6EA048, + 0x6F3F3B82,0x3520AB82,0x011A1D4B,0x277227F8,0x611560B1,0xE7933FDC,0xBB3A792B,0x344525BD, + 0xA08839E1,0x51CE794B,0x2F32C9B7,0xA01FBAC9,0xE01CC87E,0xBCC7D1F6,0xCF0111C3,0xA1E8AAC7, + 0x1A908749,0xD44FBD9A,0xD0DADECB,0xD50ADA38,0x0339C32A,0xC6913667,0x8DF9317C,0xE0B12B4F, + 0xF79E59B7,0x43F5BB3A,0xF2D519FF,0x27D9459C,0xBF97222C,0x15E6FC2A,0x0F91FC71,0x9B941525, + 0xFAE59361,0xCEB69CEB,0xC2A86459,0x12BAA8D1,0xB6C1075E,0xE3056A0C,0x10D25065,0xCB03A442, + 0xE0EC6E0E,0x1698DB3B,0x4C98A0BE,0x3278E964,0x9F1F9532,0xE0D392DF,0xD3A0342B,0x8971F21E, + 0x1B0A7441,0x4BA3348C,0xC5BE7120,0xC37632D8,0xDF359F8D,0x9B992F2E,0xE60B6F47,0x0FE3F11D, + 0xE54CDA54,0x1EDAD891,0xCE6279CF,0xCD3E7E6F,0x1618B166,0xFD2C1D05,0x848FD2C5,0xF6FB2299, + 0xF523F357,0xA6327623,0x93A83531,0x56CCCD02,0xACF08162,0x5A75EBB5,0x6E163697,0x88D273CC, + 0xDE966292,0x81B949D0,0x4C50901B,0x71C65614,0xE6C6C7BD,0x327A140A,0x45E1D006,0xC3F27B9A, + 0xC9AA53FD,0x62A80F00,0xBB25BFE2,0x35BDD2F6,0x71126905,0xB2040222,0xB6CBCF7C,0xCD769C2B, + 0x53113EC0,0x1640E3D3,0x38ABBD60,0x2547ADF0,0xBA38209C,0xF746CE76,0x77AFA1C5,0x20756060, + 0x85CBFE4E,0x8AE88DD8,0x7AAAF9B0,0x4CF9AA7E,0x1948C25C,0x02FB8A8C,0x01C36AE4,0xD6EBE1F9, + 0x90D4F869,0xA65CDEA0,0x3F09252D,0xC208E69F,0xB74E6132,0xCE77E25B,0x578FDFE3,0x3AC372E6 +} }; + +/*********************** FUNCTION DEFINITIONS ***********************/ +void blowfish_encrypt(const BYTE in[], BYTE out[], const BLOWFISH_KEY *keystruct) +{ + WORD l,r,t; //,i; + + l = (in[0] << 24) | (in[1] << 16) | (in[2] << 8) | (in[3]); + r = (in[4] << 24) | (in[5] << 16) | (in[6] << 8) | (in[7]); + + ITERATION(l,r,t,0); + ITERATION(l,r,t,1); + ITERATION(l,r,t,2); + ITERATION(l,r,t,3); + ITERATION(l,r,t,4); + ITERATION(l,r,t,5); + ITERATION(l,r,t,6); + ITERATION(l,r,t,7); + ITERATION(l,r,t,8); + ITERATION(l,r,t,9); + ITERATION(l,r,t,10); + ITERATION(l,r,t,11); + ITERATION(l,r,t,12); + ITERATION(l,r,t,13); + ITERATION(l,r,t,14); + l ^= keystruct->p[15]; F(l,t); r^= t; //Last iteration has no swap() + r ^= keystruct->p[16]; + l ^= keystruct->p[17]; + + out[0] = l >> 24; + out[1] = l >> 16; + out[2] = l >> 8; + out[3] = l; + out[4] = r >> 24; + out[5] = r >> 16; + out[6] = r >> 8; + out[7] = r; +} + +void blowfish_decrypt(const BYTE in[], BYTE out[], const BLOWFISH_KEY *keystruct) +{ + WORD l,r,t; //,i; + + l = (in[0] << 24) | (in[1] << 16) | (in[2] << 8) | (in[3]); + r = (in[4] << 24) | (in[5] << 16) | (in[6] << 8) | (in[7]); + + ITERATION(l,r,t,17); + ITERATION(l,r,t,16); + ITERATION(l,r,t,15); + ITERATION(l,r,t,14); + ITERATION(l,r,t,13); + ITERATION(l,r,t,12); + ITERATION(l,r,t,11); + ITERATION(l,r,t,10); + ITERATION(l,r,t,9); + ITERATION(l,r,t,8); + ITERATION(l,r,t,7); + ITERATION(l,r,t,6); + ITERATION(l,r,t,5); + ITERATION(l,r,t,4); + ITERATION(l,r,t,3); + l ^= keystruct->p[2]; F(l,t); r^= t; //Last iteration has no swap() + r ^= keystruct->p[1]; + l ^= keystruct->p[0]; + + out[0] = l >> 24; + out[1] = l >> 16; + out[2] = l >> 8; + out[3] = l; + out[4] = r >> 24; + out[5] = r >> 16; + out[6] = r >> 8; + out[7] = r; +} + +void blowfish_key_setup(const BYTE user_key[], BLOWFISH_KEY *keystruct, size_t len) +{ + BYTE block[8]; + int idx,idx2; + + // Copy over the constant init array vals (so the originals aren't destroyed). + memcpy(keystruct->p,p_perm,sizeof(WORD) * 18); + memcpy(keystruct->s,s_perm,sizeof(WORD) * 1024); + + // Combine the key with the P box. Assume key is standard 448 bits (56 bytes) or less. + for (idx = 0, idx2 = 0; idx < 18; ++idx, idx2 += 4) + keystruct->p[idx] ^= (user_key[idx2 % len] << 24) | (user_key[(idx2+1) % len] << 16) + | (user_key[(idx2+2) % len] << 8) | (user_key[(idx2+3) % len]); + // Re-calculate the P box. + memset(block, 0, 8); + for (idx = 0; idx < 18; idx += 2) { + blowfish_encrypt(block,block,keystruct); + keystruct->p[idx] = (block[0] << 24) | (block[1] << 16) | (block[2] << 8) | block[3]; + keystruct->p[idx+1]=(block[4] << 24) | (block[5] << 16) | (block[6] << 8) | block[7]; + } + // Recalculate the S-boxes. + for (idx = 0; idx < 4; ++idx) { + for (idx2 = 0; idx2 < 256; idx2 += 2) { + blowfish_encrypt(block,block,keystruct); + keystruct->s[idx][idx2] = (block[0] << 24) | (block[1] << 16) | + (block[2] << 8) | block[3]; + keystruct->s[idx][idx2+1] = (block[4] << 24) | (block[5] << 16) | + (block[6] << 8) | block[7]; + } + } +} diff --git a/cores/saturn/deps/crypto/blowfish.h b/cores/saturn/deps/crypto/blowfish.h new file mode 100644 index 000000000..d8e9d4a6a --- /dev/null +++ b/cores/saturn/deps/crypto/blowfish.h @@ -0,0 +1,32 @@ +/********************************************************************* +* Filename: blowfish.h +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Defines the API for the corresponding Blowfish implementation. +*********************************************************************/ + +#ifndef BLOWFISH_H +#define BLOWFISH_H + +/*************************** HEADER FILES ***************************/ +#include + +/****************************** MACROS ******************************/ +#define BLOWFISH_BLOCK_SIZE 8 // Blowfish operates on 8 bytes at a time + +/**************************** DATA TYPES ****************************/ +typedef unsigned char BYTE; // 8-bit byte +typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines + +typedef struct { + WORD p[18]; + WORD s[4][256]; +} BLOWFISH_KEY; + +/*********************** FUNCTION DECLARATIONS **********************/ +void blowfish_key_setup(const BYTE user_key[], BLOWFISH_KEY *keystruct, size_t len); +void blowfish_encrypt(const BYTE in[], BYTE out[], const BLOWFISH_KEY *keystruct); +void blowfish_decrypt(const BYTE in[], BYTE out[], const BLOWFISH_KEY *keystruct); + +#endif // BLOWFISH_H diff --git a/cores/saturn/deps/crypto/crypto_types.h b/cores/saturn/deps/crypto/crypto_types.h new file mode 100644 index 000000000..17c3164ec --- /dev/null +++ b/cores/saturn/deps/crypto/crypto_types.h @@ -0,0 +1,15 @@ +/********************************************************************* +* Filename: crypto_types.h +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Defines the API for the corresponding AES implementation. +*********************************************************************/ + +#ifndef CRYPTO_TYPES_H +#define CRYPTO_TYPES_H + +typedef unsigned char BYTE; // 8-bit byte +typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines + +#endif diff --git a/cores/saturn/deps/crypto/des.c b/cores/saturn/deps/crypto/des.c new file mode 100644 index 000000000..1118eb311 --- /dev/null +++ b/cores/saturn/deps/crypto/des.c @@ -0,0 +1,269 @@ +/********************************************************************* +* Filename: des.c +* Author: Brad Conte (brad AT radconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Implementation of the DES encryption algorithm. + Modes of operation (such as CBC) are not included. + The formal NIST algorithm specification can be found here: + * http://csrc.nist.gov/publications/fips/fips46-3/fips46-3.pdf +*********************************************************************/ + +/*************************** HEADER FILES ***************************/ +#include +#include +#include "des.h" + +/****************************** MACROS ******************************/ +// Obtain bit "b" from the left and shift it "c" places from the right +#define BITNUM(a,b,c) (((a[(b)/8] >> (7 - (b%8))) & 0x01) << (c)) +#define BITNUMINTR(a,b,c) ((((a) >> (31 - (b))) & 0x00000001) << (c)) +#define BITNUMINTL(a,b,c) ((((a) << (b)) & 0x80000000) >> (c)) + +// This macro converts a 6 bit block with the S-Box row defined as the first and last +// bits to a 6 bit block with the row defined by the first two bits. +#define SBOXBIT(a) (((a) & 0x20) | (((a) & 0x1f) >> 1) | (((a) & 0x01) << 4)) + +/**************************** VARIABLES *****************************/ +static const BYTE sbox1[64] = { + 14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7, + 0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8, + 4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0, + 15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13 +}; +static const BYTE sbox2[64] = { + 15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10, + 3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5, + 0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15, + 13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9 +}; +static const BYTE sbox3[64] = { + 10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8, + 13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1, + 13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7, + 1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12 +}; +static const BYTE sbox4[64] = { + 7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15, + 13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9, + 10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4, + 3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14 +}; +static const BYTE sbox5[64] = { + 2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9, + 14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6, + 4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14, + 11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3 +}; +static const BYTE sbox6[64] = { + 12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11, + 10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8, + 9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6, + 4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13 +}; +static const BYTE sbox7[64] = { + 4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1, + 13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6, + 1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2, + 6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12 +}; +static const BYTE sbox8[64] = { + 13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7, + 1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2, + 7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8, + 2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11 +}; + +/*********************** FUNCTION DEFINITIONS ***********************/ +// Initial (Inv)Permutation step +void IP(WORD state[], const BYTE in[]) +{ + state[0] = BITNUM(in,57,31) | BITNUM(in,49,30) | BITNUM(in,41,29) | BITNUM(in,33,28) | + BITNUM(in,25,27) | BITNUM(in,17,26) | BITNUM(in,9,25) | BITNUM(in,1,24) | + BITNUM(in,59,23) | BITNUM(in,51,22) | BITNUM(in,43,21) | BITNUM(in,35,20) | + BITNUM(in,27,19) | BITNUM(in,19,18) | BITNUM(in,11,17) | BITNUM(in,3,16) | + BITNUM(in,61,15) | BITNUM(in,53,14) | BITNUM(in,45,13) | BITNUM(in,37,12) | + BITNUM(in,29,11) | BITNUM(in,21,10) | BITNUM(in,13,9) | BITNUM(in,5,8) | + BITNUM(in,63,7) | BITNUM(in,55,6) | BITNUM(in,47,5) | BITNUM(in,39,4) | + BITNUM(in,31,3) | BITNUM(in,23,2) | BITNUM(in,15,1) | BITNUM(in,7,0); + + state[1] = BITNUM(in,56,31) | BITNUM(in,48,30) | BITNUM(in,40,29) | BITNUM(in,32,28) | + BITNUM(in,24,27) | BITNUM(in,16,26) | BITNUM(in,8,25) | BITNUM(in,0,24) | + BITNUM(in,58,23) | BITNUM(in,50,22) | BITNUM(in,42,21) | BITNUM(in,34,20) | + BITNUM(in,26,19) | BITNUM(in,18,18) | BITNUM(in,10,17) | BITNUM(in,2,16) | + BITNUM(in,60,15) | BITNUM(in,52,14) | BITNUM(in,44,13) | BITNUM(in,36,12) | + BITNUM(in,28,11) | BITNUM(in,20,10) | BITNUM(in,12,9) | BITNUM(in,4,8) | + BITNUM(in,62,7) | BITNUM(in,54,6) | BITNUM(in,46,5) | BITNUM(in,38,4) | + BITNUM(in,30,3) | BITNUM(in,22,2) | BITNUM(in,14,1) | BITNUM(in,6,0); +} + +void InvIP(WORD state[], BYTE in[]) +{ + in[0] = BITNUMINTR(state[1],7,7) | BITNUMINTR(state[0],7,6) | BITNUMINTR(state[1],15,5) | + BITNUMINTR(state[0],15,4) | BITNUMINTR(state[1],23,3) | BITNUMINTR(state[0],23,2) | + BITNUMINTR(state[1],31,1) | BITNUMINTR(state[0],31,0); + + in[1] = BITNUMINTR(state[1],6,7) | BITNUMINTR(state[0],6,6) | BITNUMINTR(state[1],14,5) | + BITNUMINTR(state[0],14,4) | BITNUMINTR(state[1],22,3) | BITNUMINTR(state[0],22,2) | + BITNUMINTR(state[1],30,1) | BITNUMINTR(state[0],30,0); + + in[2] = BITNUMINTR(state[1],5,7) | BITNUMINTR(state[0],5,6) | BITNUMINTR(state[1],13,5) | + BITNUMINTR(state[0],13,4) | BITNUMINTR(state[1],21,3) | BITNUMINTR(state[0],21,2) | + BITNUMINTR(state[1],29,1) | BITNUMINTR(state[0],29,0); + + in[3] = BITNUMINTR(state[1],4,7) | BITNUMINTR(state[0],4,6) | BITNUMINTR(state[1],12,5) | + BITNUMINTR(state[0],12,4) | BITNUMINTR(state[1],20,3) | BITNUMINTR(state[0],20,2) | + BITNUMINTR(state[1],28,1) | BITNUMINTR(state[0],28,0); + + in[4] = BITNUMINTR(state[1],3,7) | BITNUMINTR(state[0],3,6) | BITNUMINTR(state[1],11,5) | + BITNUMINTR(state[0],11,4) | BITNUMINTR(state[1],19,3) | BITNUMINTR(state[0],19,2) | + BITNUMINTR(state[1],27,1) | BITNUMINTR(state[0],27,0); + + in[5] = BITNUMINTR(state[1],2,7) | BITNUMINTR(state[0],2,6) | BITNUMINTR(state[1],10,5) | + BITNUMINTR(state[0],10,4) | BITNUMINTR(state[1],18,3) | BITNUMINTR(state[0],18,2) | + BITNUMINTR(state[1],26,1) | BITNUMINTR(state[0],26,0); + + in[6] = BITNUMINTR(state[1],1,7) | BITNUMINTR(state[0],1,6) | BITNUMINTR(state[1],9,5) | + BITNUMINTR(state[0],9,4) | BITNUMINTR(state[1],17,3) | BITNUMINTR(state[0],17,2) | + BITNUMINTR(state[1],25,1) | BITNUMINTR(state[0],25,0); + + in[7] = BITNUMINTR(state[1],0,7) | BITNUMINTR(state[0],0,6) | BITNUMINTR(state[1],8,5) | + BITNUMINTR(state[0],8,4) | BITNUMINTR(state[1],16,3) | BITNUMINTR(state[0],16,2) | + BITNUMINTR(state[1],24,1) | BITNUMINTR(state[0],24,0); +} + +WORD f(WORD state, const BYTE key[]) +{ + BYTE lrgstate[6]; //,i; + WORD t1,t2; + + // Expantion Permutation + t1 = BITNUMINTL(state,31,0) | ((state & 0xf0000000) >> 1) | BITNUMINTL(state,4,5) | + BITNUMINTL(state,3,6) | ((state & 0x0f000000) >> 3) | BITNUMINTL(state,8,11) | + BITNUMINTL(state,7,12) | ((state & 0x00f00000) >> 5) | BITNUMINTL(state,12,17) | + BITNUMINTL(state,11,18) | ((state & 0x000f0000) >> 7) | BITNUMINTL(state,16,23); + + t2 = BITNUMINTL(state,15,0) | ((state & 0x0000f000) << 15) | BITNUMINTL(state,20,5) | + BITNUMINTL(state,19,6) | ((state & 0x00000f00) << 13) | BITNUMINTL(state,24,11) | + BITNUMINTL(state,23,12) | ((state & 0x000000f0) << 11) | BITNUMINTL(state,28,17) | + BITNUMINTL(state,27,18) | ((state & 0x0000000f) << 9) | BITNUMINTL(state,0,23); + + lrgstate[0] = (t1 >> 24) & 0x000000ff; + lrgstate[1] = (t1 >> 16) & 0x000000ff; + lrgstate[2] = (t1 >> 8) & 0x000000ff; + lrgstate[3] = (t2 >> 24) & 0x000000ff; + lrgstate[4] = (t2 >> 16) & 0x000000ff; + lrgstate[5] = (t2 >> 8) & 0x000000ff; + + // Key XOR + lrgstate[0] ^= key[0]; + lrgstate[1] ^= key[1]; + lrgstate[2] ^= key[2]; + lrgstate[3] ^= key[3]; + lrgstate[4] ^= key[4]; + lrgstate[5] ^= key[5]; + + // S-Box Permutation + state = (sbox1[SBOXBIT(lrgstate[0] >> 2)] << 28) | + (sbox2[SBOXBIT(((lrgstate[0] & 0x03) << 4) | (lrgstate[1] >> 4))] << 24) | + (sbox3[SBOXBIT(((lrgstate[1] & 0x0f) << 2) | (lrgstate[2] >> 6))] << 20) | + (sbox4[SBOXBIT(lrgstate[2] & 0x3f)] << 16) | + (sbox5[SBOXBIT(lrgstate[3] >> 2)] << 12) | + (sbox6[SBOXBIT(((lrgstate[3] & 0x03) << 4) | (lrgstate[4] >> 4))] << 8) | + (sbox7[SBOXBIT(((lrgstate[4] & 0x0f) << 2) | (lrgstate[5] >> 6))] << 4) | + sbox8[SBOXBIT(lrgstate[5] & 0x3f)]; + + // P-Box Permutation + state = BITNUMINTL(state,15,0) | BITNUMINTL(state,6,1) | BITNUMINTL(state,19,2) | + BITNUMINTL(state,20,3) | BITNUMINTL(state,28,4) | BITNUMINTL(state,11,5) | + BITNUMINTL(state,27,6) | BITNUMINTL(state,16,7) | BITNUMINTL(state,0,8) | + BITNUMINTL(state,14,9) | BITNUMINTL(state,22,10) | BITNUMINTL(state,25,11) | + BITNUMINTL(state,4,12) | BITNUMINTL(state,17,13) | BITNUMINTL(state,30,14) | + BITNUMINTL(state,9,15) | BITNUMINTL(state,1,16) | BITNUMINTL(state,7,17) | + BITNUMINTL(state,23,18) | BITNUMINTL(state,13,19) | BITNUMINTL(state,31,20) | + BITNUMINTL(state,26,21) | BITNUMINTL(state,2,22) | BITNUMINTL(state,8,23) | + BITNUMINTL(state,18,24) | BITNUMINTL(state,12,25) | BITNUMINTL(state,29,26) | + BITNUMINTL(state,5,27) | BITNUMINTL(state,21,28) | BITNUMINTL(state,10,29) | + BITNUMINTL(state,3,30) | BITNUMINTL(state,24,31); + + // Return the final state value + return(state); +} + +void des_key_setup(const BYTE key[], BYTE schedule[][6], DES_MODE mode) +{ + WORD i, j, to_gen, C, D; + const WORD key_rnd_shift[16] = {1,1,2,2,2,2,2,2,1,2,2,2,2,2,2,1}; + const WORD key_perm_c[28] = {56,48,40,32,24,16,8,0,57,49,41,33,25,17, + 9,1,58,50,42,34,26,18,10,2,59,51,43,35}; + const WORD key_perm_d[28] = {62,54,46,38,30,22,14,6,61,53,45,37,29,21, + 13,5,60,52,44,36,28,20,12,4,27,19,11,3}; + const WORD key_compression[48] = {13,16,10,23,0,4,2,27,14,5,20,9, + 22,18,11,3,25,7,15,6,26,19,12,1, + 40,51,30,36,46,54,29,39,50,44,32,47, + 43,48,38,55,33,52,45,41,49,35,28,31}; + + // Permutated Choice #1 (copy the key in, ignoring parity bits). + for (i = 0, j = 31, C = 0; i < 28; ++i, --j) + C |= BITNUM(key,key_perm_c[i],j); + for (i = 0, j = 31, D = 0; i < 28; ++i, --j) + D |= BITNUM(key,key_perm_d[i],j); + + // Generate the 16 subkeys. + for (i = 0; i < 16; ++i) { + C = ((C << key_rnd_shift[i]) | (C >> (28-key_rnd_shift[i]))) & 0xfffffff0; + D = ((D << key_rnd_shift[i]) | (D >> (28-key_rnd_shift[i]))) & 0xfffffff0; + + // Decryption subkeys are reverse order of encryption subkeys so + // generate them in reverse if the key schedule is for decryption useage. + if (mode == DES_DECRYPT) + to_gen = 15 - i; + else /*(if mode == DES_ENCRYPT)*/ + to_gen = i; + // Initialize the array + for (j = 0; j < 6; ++j) + schedule[to_gen][j] = 0; + for (j = 0; j < 24; ++j) + schedule[to_gen][j/8] |= BITNUMINTR(C,key_compression[j],7 - (j%8)); + for ( ; j < 48; ++j) + schedule[to_gen][j/8] |= BITNUMINTR(D,key_compression[j] - 28,7 - (j%8)); + } +} + +void des_crypt(const BYTE in[], BYTE out[], const BYTE key[][6]) +{ + WORD state[2],idx,t; + + IP(state,in); + + for (idx=0; idx < 15; ++idx) { + t = state[1]; + state[1] = f(state[1],key[idx]) ^ state[0]; + state[0] = t; + } + // Perform the final loop manually as it doesn't switch sides + state[0] = f(state[1],key[15]) ^ state[0]; + + InvIP(state,out); +} + +void three_des_key_setup(const BYTE key[], BYTE schedule[][16][6], DES_MODE mode) +{ + if (mode == DES_ENCRYPT) { + des_key_setup(&key[0],schedule[0],mode); + des_key_setup(&key[8],schedule[1],!mode); + des_key_setup(&key[16],schedule[2],mode); + } + else /*if (mode == DES_DECRYPT*/ { + des_key_setup(&key[16],schedule[0],mode); + des_key_setup(&key[8],schedule[1],!mode); + des_key_setup(&key[0],schedule[2],mode); + } +} + +void three_des_crypt(const BYTE in[], BYTE out[], const BYTE key[][16][6]) +{ + des_crypt(in,out,key[0]); + des_crypt(out,out,key[1]); + des_crypt(out,out,key[2]); +} diff --git a/cores/saturn/deps/crypto/des.h b/cores/saturn/deps/crypto/des.h new file mode 100644 index 000000000..1503772a2 --- /dev/null +++ b/cores/saturn/deps/crypto/des.h @@ -0,0 +1,37 @@ +/********************************************************************* +* Filename: des.h +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Defines the API for the corresponding DES implementation. + Note that encryption and decryption are defined by how + the key setup is performed, the actual en/de-cryption is + performed by the same function. +*********************************************************************/ + +#ifndef DES_H +#define DESH + +/*************************** HEADER FILES ***************************/ +#include + +/****************************** MACROS ******************************/ +#define DES_BLOCK_SIZE 8 // DES operates on 8 bytes at a time + +/**************************** DATA TYPES ****************************/ +typedef unsigned char BYTE; // 8-bit byte +typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines + +typedef enum { + DES_ENCRYPT, + DES_DECRYPT +} DES_MODE; + +/*********************** FUNCTION DECLARATIONS **********************/ +void des_key_setup(const BYTE key[], BYTE schedule[][6], DES_MODE mode); +void des_crypt(const BYTE in[], BYTE out[], const BYTE key[][6]); + +void three_des_key_setup(const BYTE key[], BYTE schedule[][16][6], DES_MODE mode); +void three_des_crypt(const BYTE in[], BYTE out[], const BYTE key[][16][6]); + +#endif // DES_H diff --git a/cores/saturn/deps/crypto/md2.c b/cores/saturn/deps/crypto/md2.c new file mode 100644 index 000000000..e66d6671c --- /dev/null +++ b/cores/saturn/deps/crypto/md2.c @@ -0,0 +1,104 @@ +/********************************************************************* +* Filename: md2.c +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Implementation of the MD2 hashing algorithm. + Algorithm specification can be found here: + * http://tools.ietf.org/html/rfc1319 . + Input is little endian byte order. +*********************************************************************/ + +/*************************** HEADER FILES ***************************/ +#include +#include +#include "md2.h" + +/**************************** VARIABLES *****************************/ +static const BYTE s[256] = { + 41, 46, 67, 201, 162, 216, 124, 1, 61, 54, 84, 161, 236, 240, 6, + 19, 98, 167, 5, 243, 192, 199, 115, 140, 152, 147, 43, 217, 188, + 76, 130, 202, 30, 155, 87, 60, 253, 212, 224, 22, 103, 66, 111, 24, + 138, 23, 229, 18, 190, 78, 196, 214, 218, 158, 222, 73, 160, 251, + 245, 142, 187, 47, 238, 122, 169, 104, 121, 145, 21, 178, 7, 63, + 148, 194, 16, 137, 11, 34, 95, 33, 128, 127, 93, 154, 90, 144, 50, + 39, 53, 62, 204, 231, 191, 247, 151, 3, 255, 25, 48, 179, 72, 165, + 181, 209, 215, 94, 146, 42, 172, 86, 170, 198, 79, 184, 56, 210, + 150, 164, 125, 182, 118, 252, 107, 226, 156, 116, 4, 241, 69, 157, + 112, 89, 100, 113, 135, 32, 134, 91, 207, 101, 230, 45, 168, 2, 27, + 96, 37, 173, 174, 176, 185, 246, 28, 70, 97, 105, 52, 64, 126, 15, + 85, 71, 163, 35, 221, 81, 175, 58, 195, 92, 249, 206, 186, 197, + 234, 38, 44, 83, 13, 110, 133, 40, 132, 9, 211, 223, 205, 244, 65, + 129, 77, 82, 106, 220, 55, 200, 108, 193, 171, 250, 36, 225, 123, + 8, 12, 189, 177, 74, 120, 136, 149, 139, 227, 99, 232, 109, 233, + 203, 213, 254, 59, 0, 29, 57, 242, 239, 183, 14, 102, 88, 208, 228, + 166, 119, 114, 248, 235, 117, 75, 10, 49, 68, 80, 180, 143, 237, + 31, 26, 219, 153, 141, 51, 159, 17, 131, 20 +}; + +/*********************** FUNCTION DEFINITIONS ***********************/ +void md2_transform(MD2_CTX *ctx, BYTE data[]) +{ + int j,k,t; + + //memcpy(&ctx->state[16], data); + for (j=0; j < 16; ++j) { + ctx->state[j + 16] = data[j]; + ctx->state[j + 32] = (ctx->state[j+16] ^ ctx->state[j]); + } + + t = 0; + for (j = 0; j < 18; ++j) { + for (k = 0; k < 48; ++k) { + ctx->state[k] ^= s[t]; + t = ctx->state[k]; + } + t = (t+j) & 0xFF; + } + + t = ctx->checksum[15]; + for (j=0; j < 16; ++j) { + ctx->checksum[j] ^= s[data[j] ^ t]; + t = ctx->checksum[j]; + } +} + +void md2_init(MD2_CTX *ctx) +{ + int i; + + for (i=0; i < 48; ++i) + ctx->state[i] = 0; + for (i=0; i < 16; ++i) + ctx->checksum[i] = 0; + ctx->len = 0; +} + +void md2_update(MD2_CTX *ctx, const BYTE data[], size_t len) +{ + size_t i; + + for (i = 0; i < len; ++i) { + ctx->data[ctx->len] = data[i]; + ctx->len++; + if (ctx->len == MD2_BLOCK_SIZE) { + md2_transform(ctx, ctx->data); + ctx->len = 0; + } + } +} + +void md2_final(MD2_CTX *ctx, BYTE hash[]) +{ + int to_pad; + + to_pad = MD2_BLOCK_SIZE - ctx->len; + + while (ctx->len < MD2_BLOCK_SIZE) + ctx->data[ctx->len++] = to_pad; + + md2_transform(ctx, ctx->data); + md2_transform(ctx, ctx->checksum); + + memcpy(hash, ctx->state, MD2_BLOCK_SIZE); +} diff --git a/cores/saturn/deps/crypto/md2.h b/cores/saturn/deps/crypto/md2.h new file mode 100644 index 000000000..97706af1e --- /dev/null +++ b/cores/saturn/deps/crypto/md2.h @@ -0,0 +1,33 @@ +/********************************************************************* +* Filename: md2.h +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Defines the API for the corresponding MD2 implementation. +*********************************************************************/ + +#ifndef MD2_H +#define MD2_H + +/*************************** HEADER FILES ***************************/ +#include + +/****************************** MACROS ******************************/ +#define MD2_BLOCK_SIZE 16 + +/**************************** DATA TYPES ****************************/ +typedef unsigned char BYTE; // 8-bit byte + +typedef struct { + BYTE data[16]; + BYTE state[48]; + BYTE checksum[16]; + int len; +} MD2_CTX; + +/*********************** FUNCTION DECLARATIONS **********************/ +void md2_init(MD2_CTX *ctx); +void md2_update(MD2_CTX *ctx, const BYTE data[], size_t len); +void md2_final(MD2_CTX *ctx, BYTE hash[]); // size of hash must be MD2_BLOCK_SIZE + +#endif // MD2_H diff --git a/cores/saturn/deps/crypto/md5.c b/cores/saturn/deps/crypto/md5.c new file mode 100644 index 000000000..769199ed1 --- /dev/null +++ b/cores/saturn/deps/crypto/md5.c @@ -0,0 +1,189 @@ +/********************************************************************* +* Filename: md5.c +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Implementation of the MD5 hashing algorithm. + Algorithm specification can be found here: + * http://tools.ietf.org/html/rfc1321 + This implementation uses little endian byte order. +*********************************************************************/ + +/*************************** HEADER FILES ***************************/ +#include +#include +#include "md5.h" + +/****************************** MACROS ******************************/ +#define ROTLEFT(a,b) ((a << b) | (a >> (32-b))) + +#define F(x,y,z) ((x & y) | (~x & z)) +#define G(x,y,z) ((x & z) | (y & ~z)) +#define H(x,y,z) (x ^ y ^ z) +#define I(x,y,z) (y ^ (x | ~z)) + +#define FF(a,b,c,d,m,s,t) { a += F(b,c,d) + m + t; \ + a = b + ROTLEFT(a,s); } +#define GG(a,b,c,d,m,s,t) { a += G(b,c,d) + m + t; \ + a = b + ROTLEFT(a,s); } +#define HH(a,b,c,d,m,s,t) { a += H(b,c,d) + m + t; \ + a = b + ROTLEFT(a,s); } +#define II(a,b,c,d,m,s,t) { a += I(b,c,d) + m + t; \ + a = b + ROTLEFT(a,s); } + +/*********************** FUNCTION DEFINITIONS ***********************/ +void md5_transform(MD5_CTX *ctx, const BYTE data[]) +{ + WORD a, b, c, d, m[16], i, j; + + // MD5 specifies big endian byte order, but this implementation assumes a little + // endian byte order CPU. Reverse all the bytes upon input, and re-reverse them + // on output (in md5_final()). + for (i = 0, j = 0; i < 16; ++i, j += 4) + m[i] = (data[j]) + (data[j + 1] << 8) + (data[j + 2] << 16) + (data[j + 3] << 24); + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + + FF(a,b,c,d,m[0], 7,0xd76aa478); + FF(d,a,b,c,m[1], 12,0xe8c7b756); + FF(c,d,a,b,m[2], 17,0x242070db); + FF(b,c,d,a,m[3], 22,0xc1bdceee); + FF(a,b,c,d,m[4], 7,0xf57c0faf); + FF(d,a,b,c,m[5], 12,0x4787c62a); + FF(c,d,a,b,m[6], 17,0xa8304613); + FF(b,c,d,a,m[7], 22,0xfd469501); + FF(a,b,c,d,m[8], 7,0x698098d8); + FF(d,a,b,c,m[9], 12,0x8b44f7af); + FF(c,d,a,b,m[10],17,0xffff5bb1); + FF(b,c,d,a,m[11],22,0x895cd7be); + FF(a,b,c,d,m[12], 7,0x6b901122); + FF(d,a,b,c,m[13],12,0xfd987193); + FF(c,d,a,b,m[14],17,0xa679438e); + FF(b,c,d,a,m[15],22,0x49b40821); + + GG(a,b,c,d,m[1], 5,0xf61e2562); + GG(d,a,b,c,m[6], 9,0xc040b340); + GG(c,d,a,b,m[11],14,0x265e5a51); + GG(b,c,d,a,m[0], 20,0xe9b6c7aa); + GG(a,b,c,d,m[5], 5,0xd62f105d); + GG(d,a,b,c,m[10], 9,0x02441453); + GG(c,d,a,b,m[15],14,0xd8a1e681); + GG(b,c,d,a,m[4], 20,0xe7d3fbc8); + GG(a,b,c,d,m[9], 5,0x21e1cde6); + GG(d,a,b,c,m[14], 9,0xc33707d6); + GG(c,d,a,b,m[3], 14,0xf4d50d87); + GG(b,c,d,a,m[8], 20,0x455a14ed); + GG(a,b,c,d,m[13], 5,0xa9e3e905); + GG(d,a,b,c,m[2], 9,0xfcefa3f8); + GG(c,d,a,b,m[7], 14,0x676f02d9); + GG(b,c,d,a,m[12],20,0x8d2a4c8a); + + HH(a,b,c,d,m[5], 4,0xfffa3942); + HH(d,a,b,c,m[8], 11,0x8771f681); + HH(c,d,a,b,m[11],16,0x6d9d6122); + HH(b,c,d,a,m[14],23,0xfde5380c); + HH(a,b,c,d,m[1], 4,0xa4beea44); + HH(d,a,b,c,m[4], 11,0x4bdecfa9); + HH(c,d,a,b,m[7], 16,0xf6bb4b60); + HH(b,c,d,a,m[10],23,0xbebfbc70); + HH(a,b,c,d,m[13], 4,0x289b7ec6); + HH(d,a,b,c,m[0], 11,0xeaa127fa); + HH(c,d,a,b,m[3], 16,0xd4ef3085); + HH(b,c,d,a,m[6], 23,0x04881d05); + HH(a,b,c,d,m[9], 4,0xd9d4d039); + HH(d,a,b,c,m[12],11,0xe6db99e5); + HH(c,d,a,b,m[15],16,0x1fa27cf8); + HH(b,c,d,a,m[2], 23,0xc4ac5665); + + II(a,b,c,d,m[0], 6,0xf4292244); + II(d,a,b,c,m[7], 10,0x432aff97); + II(c,d,a,b,m[14],15,0xab9423a7); + II(b,c,d,a,m[5], 21,0xfc93a039); + II(a,b,c,d,m[12], 6,0x655b59c3); + II(d,a,b,c,m[3], 10,0x8f0ccc92); + II(c,d,a,b,m[10],15,0xffeff47d); + II(b,c,d,a,m[1], 21,0x85845dd1); + II(a,b,c,d,m[8], 6,0x6fa87e4f); + II(d,a,b,c,m[15],10,0xfe2ce6e0); + II(c,d,a,b,m[6], 15,0xa3014314); + II(b,c,d,a,m[13],21,0x4e0811a1); + II(a,b,c,d,m[4], 6,0xf7537e82); + II(d,a,b,c,m[11],10,0xbd3af235); + II(c,d,a,b,m[2], 15,0x2ad7d2bb); + II(b,c,d,a,m[9], 21,0xeb86d391); + + ctx->state[0] += a; + ctx->state[1] += b; + ctx->state[2] += c; + ctx->state[3] += d; +} + +void md5_init(MD5_CTX *ctx) +{ + ctx->datalen = 0; + ctx->bitlen = 0; + ctx->state[0] = 0x67452301; + ctx->state[1] = 0xEFCDAB89; + ctx->state[2] = 0x98BADCFE; + ctx->state[3] = 0x10325476; +} + +void md5_update(MD5_CTX *ctx, const BYTE data[], size_t len) +{ + size_t i; + + for (i = 0; i < len; ++i) { + ctx->data[ctx->datalen] = data[i]; + ctx->datalen++; + if (ctx->datalen == 64) { + md5_transform(ctx, ctx->data); + ctx->bitlen += 512; + ctx->datalen = 0; + } + } +} + +void md5_final(MD5_CTX *ctx, BYTE hash[]) +{ + size_t i; + + i = ctx->datalen; + + // Pad whatever data is left in the buffer. + if (ctx->datalen < 56) { + ctx->data[i++] = 0x80; + while (i < 56) + ctx->data[i++] = 0x00; + } + else if (ctx->datalen >= 56) { + ctx->data[i++] = 0x80; + while (i < 64) + ctx->data[i++] = 0x00; + md5_transform(ctx, ctx->data); + memset(ctx->data, 0, 56); + } + + // Append to the padding the total message's length in bits and transform. + ctx->bitlen += ctx->datalen * 8; + ctx->data[56] = ctx->bitlen; + ctx->data[57] = ctx->bitlen >> 8; + ctx->data[58] = ctx->bitlen >> 16; + ctx->data[59] = ctx->bitlen >> 24; + ctx->data[60] = ctx->bitlen >> 32; + ctx->data[61] = ctx->bitlen >> 40; + ctx->data[62] = ctx->bitlen >> 48; + ctx->data[63] = ctx->bitlen >> 56; + md5_transform(ctx, ctx->data); + + // Since this implementation uses little endian byte ordering and MD uses big endian, + // reverse all the bytes when copying the final state to the output hash. + for (i = 0; i < 4; ++i) { + hash[i] = (ctx->state[0] >> (i * 8)) & 0x000000ff; + hash[i + 4] = (ctx->state[1] >> (i * 8)) & 0x000000ff; + hash[i + 8] = (ctx->state[2] >> (i * 8)) & 0x000000ff; + hash[i + 12] = (ctx->state[3] >> (i * 8)) & 0x000000ff; + } +} diff --git a/cores/saturn/deps/crypto/md5.h b/cores/saturn/deps/crypto/md5.h new file mode 100644 index 000000000..e5b44906e --- /dev/null +++ b/cores/saturn/deps/crypto/md5.h @@ -0,0 +1,33 @@ +/********************************************************************* +* Filename: md5.h +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Defines the API for the corresponding MD5 implementation. +*********************************************************************/ + +#ifndef MD5_H +#define MD5_H + +/*************************** HEADER FILES ***************************/ +#include +#include "crypto_types.h" + +/****************************** MACROS ******************************/ +#define MD5_BLOCK_SIZE 16 // MD5 outputs a 16 byte digest + +/**************************** DATA TYPES ****************************/ + +typedef struct { + BYTE data[64]; + WORD datalen; + unsigned long long bitlen; + WORD state[4]; +} MD5_CTX; + +/*********************** FUNCTION DECLARATIONS **********************/ +void md5_init(MD5_CTX *ctx); +void md5_update(MD5_CTX *ctx, const BYTE data[], size_t len); +void md5_final(MD5_CTX *ctx, BYTE hash[]); + +#endif // MD5_H diff --git a/cores/saturn/deps/crypto/rot-13.c b/cores/saturn/deps/crypto/rot-13.c new file mode 100644 index 000000000..0ab84975e --- /dev/null +++ b/cores/saturn/deps/crypto/rot-13.c @@ -0,0 +1,35 @@ +/********************************************************************* +* Filename: rot-13.c +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Implementation of the ROT-13 encryption algorithm. + Algorithm specification can be found here: + * + This implementation uses little endian byte order. +*********************************************************************/ + +/*************************** HEADER FILES ***************************/ +#include +#include "rot-13.h" + +/*********************** FUNCTION DEFINITIONS ***********************/ +void rot13(char str[]) +{ + int case_type, idx, len; + + for (idx = 0, len = strlen(str); idx < len; idx++) { + // Only process alphabetic characters. + if (str[idx] < 'A' || (str[idx] > 'Z' && str[idx] < 'a') || str[idx] > 'z') + continue; + // Determine if the char is upper or lower case. + if (str[idx] >= 'a') + case_type = 'a'; + else + case_type = 'A'; + // Rotate the char's value, ensuring it doesn't accidentally "fall off" the end. + str[idx] = (str[idx] + 13) % (case_type + 26); + if (str[idx] < 26) + str[idx] += case_type; + } +} diff --git a/cores/saturn/deps/crypto/rot-13.h b/cores/saturn/deps/crypto/rot-13.h new file mode 100644 index 000000000..4c581c39a --- /dev/null +++ b/cores/saturn/deps/crypto/rot-13.h @@ -0,0 +1,20 @@ +/********************************************************************* +* Filename: rot-13.h +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Defines the API for the corresponding ROT-13 implementation. +*********************************************************************/ + +#ifndef ROT13_H +#define ROT13_H + +/*************************** HEADER FILES ***************************/ +#include + +/*********************** FUNCTION DECLARATIONS **********************/ +// Performs IN PLACE rotation of the input. Assumes input is NULL terminated. +// Preserves each charcter's case. Ignores non alphabetic characters. +void rot13(char str[]); + +#endif // ROT13_H diff --git a/cores/saturn/deps/crypto/sha1.c b/cores/saturn/deps/crypto/sha1.c new file mode 100644 index 000000000..eb0bda038 --- /dev/null +++ b/cores/saturn/deps/crypto/sha1.c @@ -0,0 +1,149 @@ +/********************************************************************* +* Filename: sha1.c +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Implementation of the SHA1 hashing algorithm. + Algorithm specification can be found here: + * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf + This implementation uses little endian byte order. +*********************************************************************/ + +/*************************** HEADER FILES ***************************/ +#include +#include +#include "sha1.h" + +/****************************** MACROS ******************************/ +#define ROTLEFT(a, b) ((a << b) | (a >> (32 - b))) + +/*********************** FUNCTION DEFINITIONS ***********************/ +void sha1_transform(SHA1_CTX *ctx, const BYTE data[]) +{ + WORD a, b, c, d, e, i, j, t, m[80]; + + for (i = 0, j = 0; i < 16; ++i, j += 4) + m[i] = (data[j] << 24) + (data[j + 1] << 16) + (data[j + 2] << 8) + (data[j + 3]); + for ( ; i < 80; ++i) { + m[i] = (m[i - 3] ^ m[i - 8] ^ m[i - 14] ^ m[i - 16]); + m[i] = (m[i] << 1) | (m[i] >> 31); + } + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + + for (i = 0; i < 20; ++i) { + t = ROTLEFT(a, 5) + ((b & c) ^ (~b & d)) + e + ctx->k[0] + m[i]; + e = d; + d = c; + c = ROTLEFT(b, 30); + b = a; + a = t; + } + for ( ; i < 40; ++i) { + t = ROTLEFT(a, 5) + (b ^ c ^ d) + e + ctx->k[1] + m[i]; + e = d; + d = c; + c = ROTLEFT(b, 30); + b = a; + a = t; + } + for ( ; i < 60; ++i) { + t = ROTLEFT(a, 5) + ((b & c) ^ (b & d) ^ (c & d)) + e + ctx->k[2] + m[i]; + e = d; + d = c; + c = ROTLEFT(b, 30); + b = a; + a = t; + } + for ( ; i < 80; ++i) { + t = ROTLEFT(a, 5) + (b ^ c ^ d) + e + ctx->k[3] + m[i]; + e = d; + d = c; + c = ROTLEFT(b, 30); + b = a; + a = t; + } + + ctx->state[0] += a; + ctx->state[1] += b; + ctx->state[2] += c; + ctx->state[3] += d; + ctx->state[4] += e; +} + +void sha1_init(SHA1_CTX *ctx) +{ + ctx->datalen = 0; + ctx->bitlen = 0; + ctx->state[0] = 0x67452301; + ctx->state[1] = 0xEFCDAB89; + ctx->state[2] = 0x98BADCFE; + ctx->state[3] = 0x10325476; + ctx->state[4] = 0xc3d2e1f0; + ctx->k[0] = 0x5a827999; + ctx->k[1] = 0x6ed9eba1; + ctx->k[2] = 0x8f1bbcdc; + ctx->k[3] = 0xca62c1d6; +} + +void sha1_update(SHA1_CTX *ctx, const BYTE data[], size_t len) +{ + size_t i; + + for (i = 0; i < len; ++i) { + ctx->data[ctx->datalen] = data[i]; + ctx->datalen++; + if (ctx->datalen == 64) { + sha1_transform(ctx, ctx->data); + ctx->bitlen += 512; + ctx->datalen = 0; + } + } +} + +void sha1_final(SHA1_CTX *ctx, BYTE hash[]) +{ + WORD i; + + i = ctx->datalen; + + // Pad whatever data is left in the buffer. + if (ctx->datalen < 56) { + ctx->data[i++] = 0x80; + while (i < 56) + ctx->data[i++] = 0x00; + } + else { + ctx->data[i++] = 0x80; + while (i < 64) + ctx->data[i++] = 0x00; + sha1_transform(ctx, ctx->data); + memset(ctx->data, 0, 56); + } + + // Append to the padding the total message's length in bits and transform. + ctx->bitlen += ctx->datalen * 8; + ctx->data[63] = ctx->bitlen; + ctx->data[62] = ctx->bitlen >> 8; + ctx->data[61] = ctx->bitlen >> 16; + ctx->data[60] = ctx->bitlen >> 24; + ctx->data[59] = ctx->bitlen >> 32; + ctx->data[58] = ctx->bitlen >> 40; + ctx->data[57] = ctx->bitlen >> 48; + ctx->data[56] = ctx->bitlen >> 56; + sha1_transform(ctx, ctx->data); + + // Since this implementation uses little endian byte ordering and MD uses big endian, + // reverse all the bytes when copying the final state to the output hash. + for (i = 0; i < 4; ++i) { + hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff; + hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff; + hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff; + hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff; + hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff; + } +} diff --git a/cores/saturn/deps/crypto/sha1.h b/cores/saturn/deps/crypto/sha1.h new file mode 100644 index 000000000..e4c786930 --- /dev/null +++ b/cores/saturn/deps/crypto/sha1.h @@ -0,0 +1,34 @@ +/********************************************************************* +* Filename: sha1.h +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Defines the API for the corresponding SHA1 implementation. +*********************************************************************/ + +#ifndef SHA1_H +#define SHA1_H + +/*************************** HEADER FILES ***************************/ +#include + +#include "crypto_types.h" + +/****************************** MACROS ******************************/ +#define SHA1_BLOCK_SIZE 20 // SHA1 outputs a 20 byte digest + +/**************************** DATA TYPES ****************************/ +typedef struct { + BYTE data[64]; + WORD datalen; + unsigned long long bitlen; + WORD state[5]; + WORD k[4]; +} SHA1_CTX; + +/*********************** FUNCTION DECLARATIONS **********************/ +void sha1_init(SHA1_CTX *ctx); +void sha1_update(SHA1_CTX *ctx, const BYTE data[], size_t len); +void sha1_final(SHA1_CTX *ctx, BYTE hash[]); + +#endif // SHA1_H diff --git a/cores/saturn/deps/crypto/sha256.c b/cores/saturn/deps/crypto/sha256.c new file mode 100644 index 000000000..d644d2d4e --- /dev/null +++ b/cores/saturn/deps/crypto/sha256.c @@ -0,0 +1,158 @@ +/********************************************************************* +* Filename: sha256.c +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Implementation of the SHA-256 hashing algorithm. + SHA-256 is one of the three algorithms in the SHA2 + specification. The others, SHA-384 and SHA-512, are not + offered in this implementation. + Algorithm specification can be found here: + * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf + This implementation uses little endian byte order. +*********************************************************************/ + +/*************************** HEADER FILES ***************************/ +#include +#include +#include "sha256.h" + +/****************************** MACROS ******************************/ +#define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b)))) +#define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b)))) + +#define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z))) +#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22)) +#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25)) +#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3)) +#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10)) + +/**************************** VARIABLES *****************************/ +static const WORD k[64] = { + 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5, + 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174, + 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da, + 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967, + 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85, + 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070, + 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3, + 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +}; + +/*********************** FUNCTION DEFINITIONS ***********************/ +void sha256_transform(SHA256_CTX *ctx, const BYTE data[]) +{ + WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64]; + + for (i = 0, j = 0; i < 16; ++i, j += 4) + m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]); + for ( ; i < 64; ++i) + m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16]; + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + f = ctx->state[5]; + g = ctx->state[6]; + h = ctx->state[7]; + + for (i = 0; i < 64; ++i) { + t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i]; + t2 = EP0(a) + MAJ(a,b,c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + } + + ctx->state[0] += a; + ctx->state[1] += b; + ctx->state[2] += c; + ctx->state[3] += d; + ctx->state[4] += e; + ctx->state[5] += f; + ctx->state[6] += g; + ctx->state[7] += h; +} + +void sha256_init(SHA256_CTX *ctx) +{ + ctx->datalen = 0; + ctx->bitlen = 0; + ctx->state[0] = 0x6a09e667; + ctx->state[1] = 0xbb67ae85; + ctx->state[2] = 0x3c6ef372; + ctx->state[3] = 0xa54ff53a; + ctx->state[4] = 0x510e527f; + ctx->state[5] = 0x9b05688c; + ctx->state[6] = 0x1f83d9ab; + ctx->state[7] = 0x5be0cd19; +} + +void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len) +{ + WORD i; + + for (i = 0; i < len; ++i) { + ctx->data[ctx->datalen] = data[i]; + ctx->datalen++; + if (ctx->datalen == 64) { + sha256_transform(ctx, ctx->data); + ctx->bitlen += 512; + ctx->datalen = 0; + } + } +} + +void sha256_final(SHA256_CTX *ctx, BYTE hash[]) +{ + WORD i; + + i = ctx->datalen; + + // Pad whatever data is left in the buffer. + if (ctx->datalen < 56) { + ctx->data[i++] = 0x80; + while (i < 56) + ctx->data[i++] = 0x00; + } + else { + ctx->data[i++] = 0x80; + while (i < 64) + ctx->data[i++] = 0x00; + sha256_transform(ctx, ctx->data); + memset(ctx->data, 0, 56); + } + + // Append to the padding the total message's length in bits and transform. + ctx->bitlen += ctx->datalen * 8; + ctx->data[63] = ctx->bitlen; + ctx->data[62] = ctx->bitlen >> 8; + ctx->data[61] = ctx->bitlen >> 16; + ctx->data[60] = ctx->bitlen >> 24; + ctx->data[59] = ctx->bitlen >> 32; + ctx->data[58] = ctx->bitlen >> 40; + ctx->data[57] = ctx->bitlen >> 48; + ctx->data[56] = ctx->bitlen >> 56; + sha256_transform(ctx, ctx->data); + + // Since this implementation uses little endian byte ordering and SHA uses big endian, + // reverse all the bytes when copying the final state to the output hash. + for (i = 0; i < 4; ++i) { + hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff; + hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff; + hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff; + hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff; + hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff; + hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff; + hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff; + hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff; + } +} diff --git a/cores/saturn/deps/crypto/sha256.h b/cores/saturn/deps/crypto/sha256.h new file mode 100644 index 000000000..7123a30dd --- /dev/null +++ b/cores/saturn/deps/crypto/sha256.h @@ -0,0 +1,34 @@ +/********************************************************************* +* Filename: sha256.h +* Author: Brad Conte (brad AT bradconte.com) +* Copyright: +* Disclaimer: This code is presented "as is" without any guarantees. +* Details: Defines the API for the corresponding SHA1 implementation. +*********************************************************************/ + +#ifndef SHA256_H +#define SHA256_H + +/*************************** HEADER FILES ***************************/ +#include + +/****************************** MACROS ******************************/ +#define SHA256_BLOCK_SIZE 32 // SHA256 outputs a 32 byte digest + +/**************************** DATA TYPES ****************************/ +typedef unsigned char BYTE; // 8-bit byte +typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines + +typedef struct { + BYTE data[64]; + WORD datalen; + unsigned long long bitlen; + WORD state[8]; +} SHA256_CTX; + +/*********************** FUNCTION DECLARATIONS **********************/ +void sha256_init(SHA256_CTX *ctx); +void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len); +void sha256_final(SHA256_CTX *ctx, BYTE hash[]); + +#endif // SHA256_H diff --git a/cores/saturn/deps/libchdr/CMakeLists.txt b/cores/saturn/deps/libchdr/CMakeLists.txt new file mode 100644 index 000000000..5b62d36cc --- /dev/null +++ b/cores/saturn/deps/libchdr/CMakeLists.txt @@ -0,0 +1,123 @@ +cmake_minimum_required(VERSION 3.1) + +project(chdr C) + +set(CHDR_VERSION_MAJOR 0) +set(CHDR_VERSION_MINOR 1) + +option(BUILD_SHARED_LIBS "Build libchdr also as a shared library" ON) +option(WITH_SYSTEM_ZLIB "Use system provided zlib library" OFF) + +if(CMAKE_C_COMPILER_ID MATCHES "GNU|Clang") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden -fPIC -O3 -flto") +endif() + +include(FindPkgConfig) + +if (NOT DEFINED CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX "/usr/local") +endif() +if (NOT DEFINED CMAKE_INSTALL_LIBDIR) + set(CMAKE_INSTALL_LIBDIR "lib") +endif() +if (NOT DEFINED CMAKE_INSTALL_INCLUDEDIR) + set(CMAKE_INSTALL_INCLUDEDIR "include") +endif() + +# Detect processor type. +if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64") + set(CPU_ARCH "x64") +elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") + # MSVC x86/x64 + if(CMAKE_SIZEOF_VOID_P EQUAL 8) + set(CPU_ARCH "x64") + else() + set(CPU_ARCH "x86") + endif() +elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR + ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686") + set(CPU_ARCH "x86") +elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") + set(CPU_ARCH "aarch64") +elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "armv7-a") + set(CPU_ARCH "arm") +else() + message(FATAL_ERROR "Unknown system processor: " ${CMAKE_SYSTEM_PROCESSOR}) +endif() + +#-------------------------------------------------- +# dependencies +#-------------------------------------------------- + + +# lzma +add_subdirectory(deps/lzma-19.00 EXCLUDE_FROM_ALL) + list(APPEND CHDR_LIBS lzma) + list(APPEND CHDR_INCLUDES lzma) + +# zlib +if (WITH_SYSTEM_ZLIB) + pkg_check_modules(ZLIB REQUIRED zlib) + list(APPEND PLATFORM_INCLUDES ${ZLIB_INCLUDE_DIRS}) + list(APPEND PLATFORM_LIBS ${ZLIB_LIBRARIES}) +else() + add_subdirectory(deps/zlib-1.2.11 EXCLUDE_FROM_ALL) + list(APPEND CHDR_LIBS zlib) + list(APPEND CHDR_INCLUDES zlib) +endif() + +#-------------------------------------------------- +# chdr +#-------------------------------------------------- + +set(CHDR_SOURCES + src/libchdr_bitstream.c + src/libchdr_cdrom.c + src/libchdr_chd.c + src/libchdr_flac.c + src/libchdr_huffman.c +) + +list(APPEND CHDR_INCLUDES include ${CMAKE_CURRENT_BINARY_DIR}/include) + +add_library(chdr-static STATIC ${CHDR_SOURCES}) +target_include_directories(chdr-static PRIVATE ${CHDR_INCLUDES} ${PLATFORM_INCLUDES}) +target_compile_definitions(chdr-static PRIVATE ${CHDR_DEFS}) +target_link_libraries(chdr-static ${CHDR_LIBS} ${PLATFORM_LIBS}) + +if (BUILD_SHARED_LIBS) + set(LIBS ${PLATFORM_LIBS}) + list(TRANSFORM LIBS PREPEND "-l") + list(JOIN LIBS " " LIBS) + set(CMAKE_CXX_VISIBILITY_PRESET hidden) + set(CMAKE_VISIBILITY_INLINES_HIDDEN 1) + + add_library(chdr SHARED ${CHDR_SOURCES}) + target_include_directories(chdr PRIVATE ${CHDR_INCLUDES} ${PLATFORM_INCLUDES}) + target_compile_definitions(chdr PRIVATE ${CHDR_DEFS}) + target_link_libraries(chdr ${CHDR_LIBS} ${PLATFORM_LIBS}) + + if(MSVC) + target_compile_definitions(chdr PUBLIC "CHD_DLL") + target_compile_definitions(chdr PRIVATE "CHD_DLL_EXPORTS") + else() + target_link_options(chdr PRIVATE -Wl,--version-script ${CMAKE_SOURCE_DIR}/src/link.T -Wl,--no-undefined) + endif() + + set_target_properties(chdr PROPERTIES PUBLIC_HEADER "include/libchdr/bitstream.h;include/libchdr/cdrom.h;include/libchdr/chd.h;include/libchdr/coretypes.h;include/libchdr/flac.h;include/libchdr/huffman.h") + set_target_properties(chdr PROPERTIES VERSION "${CHDR_VERSION_MAJOR}.${CHDR_VERSION_MINOR}") + + if (CMAKE_BUILD_TYPE MATCHES Release) + #add_custom_command(TARGET chdr POST_BUILD COMMAND ${CMAKE_STRIP} libchdr.so) + endif (CMAKE_BUILD_TYPE MATCHES Release) + + install(TARGETS chdr + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" + PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libchdr" + ) + + configure_file(pkg-config.pc.in ${CMAKE_BINARY_DIR}/libchdr.pc @ONLY) + install(FILES ${CMAKE_BINARY_DIR}/libchdr.pc DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") +endif() + diff --git a/cores/saturn/deps/libchdr/LICENSE.txt b/cores/saturn/deps/libchdr/LICENSE.txt new file mode 100644 index 000000000..1c36e5b59 --- /dev/null +++ b/cores/saturn/deps/libchdr/LICENSE.txt @@ -0,0 +1,24 @@ +Copyright Romain Tisserand +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/cores/saturn/deps/libchdr/README.md b/cores/saturn/deps/libchdr/README.md new file mode 100644 index 000000000..940920a53 --- /dev/null +++ b/cores/saturn/deps/libchdr/README.md @@ -0,0 +1,7 @@ +# libchdr + +libchdr is a standalone library for reading MAME's CHDv1-v5 formats. + +The code is based off of MAME's old C codebase which read up to CHDv4 with OS-dependent features removed, and CHDv5 support backported from MAME's current C++ codebase. + +libchdr is licensed under the BSD 3-Clause (see [LICENSE.txt](LICENSE.txt)) and uses third party libraries that are each distributed under their own terms (see each library's license in [deps/](deps/)). diff --git a/cores/saturn/deps/libchdr/include/dr_libs/dr_flac.h b/cores/saturn/deps/libchdr/include/dr_libs/dr_flac.h new file mode 100644 index 000000000..6cf72614e --- /dev/null +++ b/cores/saturn/deps/libchdr/include/dr_libs/dr_flac.h @@ -0,0 +1,10739 @@ +/* +FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file. +dr_flac - v0.12.28 - 2021-02-21 + +David Reid - mackron@gmail.com + +GitHub: https://github.com/mackron/dr_libs +*/ + +/* +RELEASE NOTES - v0.12.0 +======================= +Version 0.12.0 has breaking API changes including changes to the existing API and the removal of deprecated APIs. + + +Improved Client-Defined Memory Allocation +----------------------------------------- +The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The +existing system of DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE are still in place and will be used by default when no custom +allocation callbacks are specified. + +To use the new system, you pass in a pointer to a drflac_allocation_callbacks object to drflac_open() and family, like this: + + void* my_malloc(size_t sz, void* pUserData) + { + return malloc(sz); + } + void* my_realloc(void* p, size_t sz, void* pUserData) + { + return realloc(p, sz); + } + void my_free(void* p, void* pUserData) + { + free(p); + } + + ... + + drflac_allocation_callbacks allocationCallbacks; + allocationCallbacks.pUserData = &myData; + allocationCallbacks.onMalloc = my_malloc; + allocationCallbacks.onRealloc = my_realloc; + allocationCallbacks.onFree = my_free; + drflac* pFlac = drflac_open_file("my_file.flac", &allocationCallbacks); + +The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines. + +Passing in null for the allocation callbacks object will cause dr_flac to use defaults which is the same as DRFLAC_MALLOC, +DRFLAC_REALLOC and DRFLAC_FREE and the equivalent of how it worked in previous versions. + +Every API that opens a drflac object now takes this extra parameter. These include the following: + + drflac_open() + drflac_open_relaxed() + drflac_open_with_metadata() + drflac_open_with_metadata_relaxed() + drflac_open_file() + drflac_open_file_with_metadata() + drflac_open_memory() + drflac_open_memory_with_metadata() + drflac_open_and_read_pcm_frames_s32() + drflac_open_and_read_pcm_frames_s16() + drflac_open_and_read_pcm_frames_f32() + drflac_open_file_and_read_pcm_frames_s32() + drflac_open_file_and_read_pcm_frames_s16() + drflac_open_file_and_read_pcm_frames_f32() + drflac_open_memory_and_read_pcm_frames_s32() + drflac_open_memory_and_read_pcm_frames_s16() + drflac_open_memory_and_read_pcm_frames_f32() + + + +Optimizations +------------- +Seeking performance has been greatly improved. A new binary search based seeking algorithm has been introduced which significantly +improves performance over the brute force method which was used when no seek table was present. Seek table based seeking also takes +advantage of the new binary search seeking system to further improve performance there as well. Note that this depends on CRC which +means it will be disabled when DR_FLAC_NO_CRC is used. + +The SSE4.1 pipeline has been cleaned up and optimized. You should see some improvements with decoding speed of 24-bit files in +particular. 16-bit streams should also see some improvement. + +drflac_read_pcm_frames_s16() has been optimized. Previously this sat on top of drflac_read_pcm_frames_s32() and performed it's s32 +to s16 conversion in a second pass. This is now all done in a single pass. This includes SSE2 and ARM NEON optimized paths. + +A minor optimization has been implemented for drflac_read_pcm_frames_s32(). This will now use an SSE2 optimized pipeline for stereo +channel reconstruction which is the last part of the decoding process. + +The ARM build has seen a few improvements. The CLZ (count leading zeroes) and REV (byte swap) instructions are now used when +compiling with GCC and Clang which is achieved using inline assembly. The CLZ instruction requires ARM architecture version 5 at +compile time and the REV instruction requires ARM architecture version 6. + +An ARM NEON optimized pipeline has been implemented. To enable this you'll need to add -mfpu=neon to the command line when compiling. + + +Removed APIs +------------ +The following APIs were deprecated in version 0.11.0 and have been completely removed in version 0.12.0: + + drflac_read_s32() -> drflac_read_pcm_frames_s32() + drflac_read_s16() -> drflac_read_pcm_frames_s16() + drflac_read_f32() -> drflac_read_pcm_frames_f32() + drflac_seek_to_sample() -> drflac_seek_to_pcm_frame() + drflac_open_and_decode_s32() -> drflac_open_and_read_pcm_frames_s32() + drflac_open_and_decode_s16() -> drflac_open_and_read_pcm_frames_s16() + drflac_open_and_decode_f32() -> drflac_open_and_read_pcm_frames_f32() + drflac_open_and_decode_file_s32() -> drflac_open_file_and_read_pcm_frames_s32() + drflac_open_and_decode_file_s16() -> drflac_open_file_and_read_pcm_frames_s16() + drflac_open_and_decode_file_f32() -> drflac_open_file_and_read_pcm_frames_f32() + drflac_open_and_decode_memory_s32() -> drflac_open_memory_and_read_pcm_frames_s32() + drflac_open_and_decode_memory_s16() -> drflac_open_memory_and_read_pcm_frames_s16() + drflac_open_and_decode_memory_f32() -> drflac_open_memroy_and_read_pcm_frames_f32() + +Prior versions of dr_flac operated on a per-sample basis whereas now it operates on PCM frames. The removed APIs all relate +to the old per-sample APIs. You now need to use the "pcm_frame" versions. +*/ + + +/* +Introduction +============ +dr_flac is a single file library. To use it, do something like the following in one .c file. + + ```c + #define DR_FLAC_IMPLEMENTATION + #include "dr_flac.h" + ``` + +You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, do something like the following: + + ```c + drflac* pFlac = drflac_open_file("MySong.flac", NULL); + if (pFlac == NULL) { + // Failed to open FLAC file + } + + drflac_int32* pSamples = malloc(pFlac->totalPCMFrameCount * pFlac->channels * sizeof(drflac_int32)); + drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_pcm_frames_s32(pFlac, pFlac->totalPCMFrameCount, pSamples); + ``` + +The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of channels and the bits per sample, +should be directly accessible - just make sure you don't change their values. Samples are always output as interleaved signed 32-bit PCM. In the example above +a native FLAC stream was opened, however dr_flac has seamless support for Ogg encapsulated FLAC streams as well. + +You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and the decoder will give you as many +samples as it can, up to the amount requested. Later on when you need the next batch of samples, just call it again. Example: + + ```c + while (drflac_read_pcm_frames_s32(pFlac, chunkSizeInPCMFrames, pChunkSamples) > 0) { + do_something(); + } + ``` + +You can seek to a specific PCM frame with `drflac_seek_to_pcm_frame()`. + +If you just want to quickly decode an entire FLAC file in one go you can do something like this: + + ```c + unsigned int channels; + unsigned int sampleRate; + drflac_uint64 totalPCMFrameCount; + drflac_int32* pSampleData = drflac_open_file_and_read_pcm_frames_s32("MySong.flac", &channels, &sampleRate, &totalPCMFrameCount, NULL); + if (pSampleData == NULL) { + // Failed to open and decode FLAC file. + } + + ... + + drflac_free(pSampleData, NULL); + ``` + +You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs respectively, but note that these +should be considered lossy. + + +If you need access to metadata (album art, etc.), use `drflac_open_with_metadata()`, `drflac_open_file_with_metdata()` or `drflac_open_memory_with_metadata()`. +The rationale for keeping these APIs separate is that they're slightly slower than the normal versions and also just a little bit harder to use. dr_flac +reports metadata to the application through the use of a callback, and every metadata block is reported before `drflac_open_with_metdata()` returns. + +The main opening APIs (`drflac_open()`, etc.) will fail if the header is not present. The presents a problem in certain scenarios such as broadcast style +streams or internet radio where the header may not be present because the user has started playback mid-stream. To handle this, use the relaxed APIs: + + `drflac_open_relaxed()` + `drflac_open_with_metadata_relaxed()` + +It is not recommended to use these APIs for file based streams because a missing header would usually indicate a corrupt or perverse file. In addition, these +APIs can take a long time to initialize because they may need to spend a lot of time finding the first frame. + + + +Build Options +============= +#define these options before including this file. + +#define DR_FLAC_NO_OGG + Disables support for Ogg/FLAC streams. + +#define DR_FLAC_BUFFER_SIZE + Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls back to the client for more data. + Larger values means more memory, but better performance. My tests show diminishing returns after about 4KB (which is the default). Consider reducing this if + you have a very efficient implementation of onRead(), or increase it if it's very inefficient. Must be a multiple of 8. + +#define DR_FLAC_NO_CRC + Disables CRC checks. This will offer a performance boost when CRC is unnecessary. This will disable binary search seeking. When seeking, the seek table will + be used if available. Otherwise the seek will be performed using brute force. + +#define DR_FLAC_NO_SIMD + Disables SIMD optimizations (SSE on x86/x64 architectures, NEON on ARM architectures). Use this if you are having compatibility issues with your compiler. + + + +Notes +===== +- dr_flac does not support changing the sample rate nor channel count mid stream. +- dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization. +- When using Ogg encapsulation, a corrupted metadata block will result in `drflac_open_with_metadata()` and `drflac_open()` returning inconsistent samples due + to differences in corrupted stream recorvery logic between the two APIs. +*/ + +#ifndef dr_flac_h +#define dr_flac_h + +#ifdef __cplusplus +extern "C" { +#endif + +#define DRFLAC_STRINGIFY(x) #x +#define DRFLAC_XSTRINGIFY(x) DRFLAC_STRINGIFY(x) + +#define DRFLAC_VERSION_MAJOR 0 +#define DRFLAC_VERSION_MINOR 12 +#define DRFLAC_VERSION_REVISION 28 +#define DRFLAC_VERSION_STRING DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION) + +#include /* For size_t. */ + +/* Sized types. */ +typedef signed char drflac_int8; +typedef unsigned char drflac_uint8; +typedef signed short drflac_int16; +typedef unsigned short drflac_uint16; +typedef signed int drflac_int32; +typedef unsigned int drflac_uint32; +#if defined(_MSC_VER) + typedef signed __int64 drflac_int64; + typedef unsigned __int64 drflac_uint64; +#else + #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wlong-long" + #if defined(__clang__) + #pragma GCC diagnostic ignored "-Wc++11-long-long" + #endif + #endif + typedef signed long long drflac_int64; + typedef unsigned long long drflac_uint64; + #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))) + #pragma GCC diagnostic pop + #endif +#endif +#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__) + typedef drflac_uint64 drflac_uintptr; +#else + typedef drflac_uint32 drflac_uintptr; +#endif +typedef drflac_uint8 drflac_bool8; +typedef drflac_uint32 drflac_bool32; +#define DRFLAC_TRUE 1 +#define DRFLAC_FALSE 0 + +#if !defined(DRFLAC_API) + #if defined(DRFLAC_DLL) + #if defined(_WIN32) + #define DRFLAC_DLL_IMPORT __declspec(dllimport) + #define DRFLAC_DLL_EXPORT __declspec(dllexport) + #define DRFLAC_DLL_PRIVATE static + #else + #if defined(__GNUC__) && __GNUC__ >= 4 + #define DRFLAC_DLL_IMPORT __attribute__((visibility("default"))) + #define DRFLAC_DLL_EXPORT __attribute__((visibility("default"))) + #define DRFLAC_DLL_PRIVATE __attribute__((visibility("hidden"))) + #else + #define DRFLAC_DLL_IMPORT + #define DRFLAC_DLL_EXPORT + #define DRFLAC_DLL_PRIVATE static + #endif + #endif + + #if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION) + #define DRFLAC_API DRFLAC_DLL_EXPORT + #else + #define DRFLAC_API DRFLAC_DLL_IMPORT + #endif + #define DRFLAC_PRIVATE DRFLAC_DLL_PRIVATE + #else + #define DRFLAC_API extern + #define DRFLAC_PRIVATE static + #endif +#endif + +#if defined(_MSC_VER) && _MSC_VER >= 1700 /* Visual Studio 2012 */ + #define DRFLAC_DEPRECATED __declspec(deprecated) +#elif (defined(__GNUC__) && __GNUC__ >= 4) /* GCC 4 */ + #define DRFLAC_DEPRECATED __attribute__((deprecated)) +#elif defined(__has_feature) /* Clang */ + #if __has_feature(attribute_deprecated) + #define DRFLAC_DEPRECATED __attribute__((deprecated)) + #else + #define DRFLAC_DEPRECATED + #endif +#else + #define DRFLAC_DEPRECATED +#endif + +DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision); +DRFLAC_API const char* drflac_version_string(void); + +/* +As data is read from the client it is placed into an internal buffer for fast access. This controls the size of that buffer. Larger values means more speed, +but also more memory. In my testing there is diminishing returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8. +*/ +#ifndef DR_FLAC_BUFFER_SIZE +#define DR_FLAC_BUFFER_SIZE 4096 +#endif + +/* Check if we can enable 64-bit optimizations. */ +#if defined(_WIN64) || defined(_LP64) || defined(__LP64__) +#define DRFLAC_64BIT +#endif + +#ifdef DRFLAC_64BIT +typedef drflac_uint64 drflac_cache_t; +#else +typedef drflac_uint32 drflac_cache_t; +#endif + +/* The various metadata block types. */ +#define DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO 0 +#define DRFLAC_METADATA_BLOCK_TYPE_PADDING 1 +#define DRFLAC_METADATA_BLOCK_TYPE_APPLICATION 2 +#define DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE 3 +#define DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT 4 +#define DRFLAC_METADATA_BLOCK_TYPE_CUESHEET 5 +#define DRFLAC_METADATA_BLOCK_TYPE_PICTURE 6 +#define DRFLAC_METADATA_BLOCK_TYPE_INVALID 127 + +/* The various picture types specified in the PICTURE block. */ +#define DRFLAC_PICTURE_TYPE_OTHER 0 +#define DRFLAC_PICTURE_TYPE_FILE_ICON 1 +#define DRFLAC_PICTURE_TYPE_OTHER_FILE_ICON 2 +#define DRFLAC_PICTURE_TYPE_COVER_FRONT 3 +#define DRFLAC_PICTURE_TYPE_COVER_BACK 4 +#define DRFLAC_PICTURE_TYPE_LEAFLET_PAGE 5 +#define DRFLAC_PICTURE_TYPE_MEDIA 6 +#define DRFLAC_PICTURE_TYPE_LEAD_ARTIST 7 +#define DRFLAC_PICTURE_TYPE_ARTIST 8 +#define DRFLAC_PICTURE_TYPE_CONDUCTOR 9 +#define DRFLAC_PICTURE_TYPE_BAND 10 +#define DRFLAC_PICTURE_TYPE_COMPOSER 11 +#define DRFLAC_PICTURE_TYPE_LYRICIST 12 +#define DRFLAC_PICTURE_TYPE_RECORDING_LOCATION 13 +#define DRFLAC_PICTURE_TYPE_DURING_RECORDING 14 +#define DRFLAC_PICTURE_TYPE_DURING_PERFORMANCE 15 +#define DRFLAC_PICTURE_TYPE_SCREEN_CAPTURE 16 +#define DRFLAC_PICTURE_TYPE_BRIGHT_COLORED_FISH 17 +#define DRFLAC_PICTURE_TYPE_ILLUSTRATION 18 +#define DRFLAC_PICTURE_TYPE_BAND_LOGOTYPE 19 +#define DRFLAC_PICTURE_TYPE_PUBLISHER_LOGOTYPE 20 + +typedef enum +{ + drflac_container_native, + drflac_container_ogg, + drflac_container_unknown +} drflac_container; + +typedef enum +{ + drflac_seek_origin_start, + drflac_seek_origin_current +} drflac_seek_origin; + +/* Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block. */ +#pragma pack(2) +typedef struct +{ + drflac_uint64 firstPCMFrame; + drflac_uint64 flacFrameOffset; /* The offset from the first byte of the header of the first frame. */ + drflac_uint16 pcmFrameCount; +} drflac_seekpoint; +#pragma pack() + +typedef struct +{ + drflac_uint16 minBlockSizeInPCMFrames; + drflac_uint16 maxBlockSizeInPCMFrames; + drflac_uint32 minFrameSizeInPCMFrames; + drflac_uint32 maxFrameSizeInPCMFrames; + drflac_uint32 sampleRate; + drflac_uint8 channels; + drflac_uint8 bitsPerSample; + drflac_uint64 totalPCMFrameCount; + drflac_uint8 md5[16]; +} drflac_streaminfo; + +typedef struct +{ + /* + The metadata type. Use this to know how to interpret the data below. Will be set to one of the + DRFLAC_METADATA_BLOCK_TYPE_* tokens. + */ + drflac_uint32 type; + + /* + A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to + not modify the contents of this buffer. Use the structures below for more meaningful and structured + information about the metadata. It's possible for this to be null. + */ + const void* pRawData; + + /* The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL. */ + drflac_uint32 rawDataSize; + + union + { + drflac_streaminfo streaminfo; + + struct + { + int unused; + } padding; + + struct + { + drflac_uint32 id; + const void* pData; + drflac_uint32 dataSize; + } application; + + struct + { + drflac_uint32 seekpointCount; + const drflac_seekpoint* pSeekpoints; + } seektable; + + struct + { + drflac_uint32 vendorLength; + const char* vendor; + drflac_uint32 commentCount; + const void* pComments; + } vorbis_comment; + + struct + { + char catalog[128]; + drflac_uint64 leadInSampleCount; + drflac_bool32 isCD; + drflac_uint8 trackCount; + const void* pTrackData; + } cuesheet; + + struct + { + drflac_uint32 type; + drflac_uint32 mimeLength; + const char* mime; + drflac_uint32 descriptionLength; + const char* description; + drflac_uint32 width; + drflac_uint32 height; + drflac_uint32 colorDepth; + drflac_uint32 indexColorCount; + drflac_uint32 pictureDataSize; + const drflac_uint8* pPictureData; + } picture; + } data; +} drflac_metadata; + + +/* +Callback for when data needs to be read from the client. + + +Parameters +---------- +pUserData (in) + The user data that was passed to drflac_open() and family. + +pBufferOut (out) + The output buffer. + +bytesToRead (in) + The number of bytes to read. + + +Return Value +------------ +The number of bytes actually read. + + +Remarks +------- +A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until either the entire bytesToRead is filled or +you have reached the end of the stream. +*/ +typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead); + +/* +Callback for when data needs to be seeked. + + +Parameters +---------- +pUserData (in) + The user data that was passed to drflac_open() and family. + +offset (in) + The number of bytes to move, relative to the origin. Will never be negative. + +origin (in) + The origin of the seek - the current position or the start of the stream. + + +Return Value +------------ +Whether or not the seek was successful. + + +Remarks +------- +The offset will never be negative. Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be +either drflac_seek_origin_start or drflac_seek_origin_current. + +When seeking to a PCM frame using drflac_seek_to_pcm_frame(), dr_flac may call this with an offset beyond the end of the FLAC stream. This needs to be detected +and handled by returning DRFLAC_FALSE. +*/ +typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin); + +/* +Callback for when a metadata block is read. + + +Parameters +---------- +pUserData (in) + The user data that was passed to drflac_open() and family. + +pMetadata (in) + A pointer to a structure containing the data of the metadata block. + + +Remarks +------- +Use pMetadata->type to determine which metadata block is being handled and how to read the data. This +will be set to one of the DRFLAC_METADATA_BLOCK_TYPE_* tokens. +*/ +typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata); + + +typedef struct +{ + void* pUserData; + void* (* onMalloc)(size_t sz, void* pUserData); + void* (* onRealloc)(void* p, size_t sz, void* pUserData); + void (* onFree)(void* p, void* pUserData); +} drflac_allocation_callbacks; + +/* Structure for internal use. Only used for decoders opened with drflac_open_memory. */ +typedef struct +{ + const drflac_uint8* data; + size_t dataSize; + size_t currentReadPos; +} drflac__memory_stream; + +/* Structure for internal use. Used for bit streaming. */ +typedef struct +{ + /* The function to call when more data needs to be read. */ + drflac_read_proc onRead; + + /* The function to call when the current read position needs to be moved. */ + drflac_seek_proc onSeek; + + /* The user data to pass around to onRead and onSeek. */ + void* pUserData; + + + /* + The number of unaligned bytes in the L2 cache. This will always be 0 until the end of the stream is hit. At the end of the + stream there will be a number of bytes that don't cleanly fit in an L1 cache line, so we use this variable to know whether + or not the bistreamer needs to run on a slower path to read those last bytes. This will never be more than sizeof(drflac_cache_t). + */ + size_t unalignedByteCount; + + /* The content of the unaligned bytes. */ + drflac_cache_t unalignedCache; + + /* The index of the next valid cache line in the "L2" cache. */ + drflac_uint32 nextL2Line; + + /* The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining. */ + drflac_uint32 consumedBits; + + /* + The cached data which was most recently read from the client. There are two levels of cache. Data flows as such: + Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions. + */ + drflac_cache_t cacheL2[DR_FLAC_BUFFER_SIZE/sizeof(drflac_cache_t)]; + drflac_cache_t cache; + + /* + CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this + is reset to 0 at the beginning of each frame. + */ + drflac_uint16 crc16; + drflac_cache_t crc16Cache; /* A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded. */ + drflac_uint32 crc16CacheIgnoredBytes; /* The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache. */ +} drflac_bs; + +typedef struct +{ + /* The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC. */ + drflac_uint8 subframeType; + + /* The number of wasted bits per sample as specified by the sub-frame header. */ + drflac_uint8 wastedBitsPerSample; + + /* The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC. */ + drflac_uint8 lpcOrder; + + /* A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. */ + drflac_int32* pSamplesS32; +} drflac_subframe; + +typedef struct +{ + /* + If the stream uses variable block sizes, this will be set to the index of the first PCM frame. If fixed block sizes are used, this will + always be set to 0. This is 64-bit because the decoded PCM frame number will be 36 bits. + */ + drflac_uint64 pcmFrameNumber; + + /* + If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0. This + is 32-bit because in fixed block sizes, the maximum frame number will be 31 bits. + */ + drflac_uint32 flacFrameNumber; + + /* The sample rate of this frame. */ + drflac_uint32 sampleRate; + + /* The number of PCM frames in each sub-frame within this frame. */ + drflac_uint16 blockSizeInPCMFrames; + + /* + The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this + will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE. + */ + drflac_uint8 channelAssignment; + + /* The number of bits per sample within this frame. */ + drflac_uint8 bitsPerSample; + + /* The frame's CRC. */ + drflac_uint8 crc8; +} drflac_frame_header; + +typedef struct +{ + /* The header. */ + drflac_frame_header header; + + /* + The number of PCM frames left to be read in this FLAC frame. This is initially set to the block size. As PCM frames are read, + this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame. + */ + drflac_uint32 pcmFramesRemaining; + + /* The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels. */ + drflac_subframe subframes[8]; +} drflac_frame; + +typedef struct +{ + /* The function to call when a metadata block is read. */ + drflac_meta_proc onMeta; + + /* The user data posted to the metadata callback function. */ + void* pUserDataMD; + + /* Memory allocation callbacks. */ + drflac_allocation_callbacks allocationCallbacks; + + + /* The sample rate. Will be set to something like 44100. */ + drflac_uint32 sampleRate; + + /* + The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the + value specified in the STREAMINFO block. + */ + drflac_uint8 channels; + + /* The bits per sample. Will be set to something like 16, 24, etc. */ + drflac_uint8 bitsPerSample; + + /* The maximum block size, in samples. This number represents the number of samples in each channel (not combined). */ + drflac_uint16 maxBlockSizeInPCMFrames; + + /* + The total number of PCM Frames making up the stream. Can be 0 in which case it's still a valid stream, but just means + the total PCM frame count is unknown. Likely the case with streams like internet radio. + */ + drflac_uint64 totalPCMFrameCount; + + + /* The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. */ + drflac_container container; + + /* The number of seekpoints in the seektable. */ + drflac_uint32 seekpointCount; + + + /* Information about the frame the decoder is currently sitting on. */ + drflac_frame currentFLACFrame; + + + /* The index of the PCM frame the decoder is currently sitting on. This is only used for seeking. */ + drflac_uint64 currentPCMFrame; + + /* The position of the first FLAC frame in the stream. This is only ever used for seeking. */ + drflac_uint64 firstFLACFramePosInBytes; + + + /* A hack to avoid a malloc() when opening a decoder with drflac_open_memory(). */ + drflac__memory_stream memoryStream; + + + /* A pointer to the decoded sample data. This is an offset of pExtraData. */ + drflac_int32* pDecodedSamples; + + /* A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table. */ + drflac_seekpoint* pSeekpoints; + + /* Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData. */ + void* _oggbs; + + /* Internal use only. Used for profiling and testing different seeking modes. */ + drflac_bool32 _noSeekTableSeek : 1; + drflac_bool32 _noBinarySearchSeek : 1; + drflac_bool32 _noBruteForceSeek : 1; + + /* The bit streamer. The raw FLAC data is fed through this object. */ + drflac_bs bs; + + /* Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs. */ + drflac_uint8 pExtraData[1]; +} drflac; + + +/* +Opens a FLAC decoder. + + +Parameters +---------- +onRead (in) + The function to call when data needs to be read from the client. + +onSeek (in) + The function to call when the read position of the client data needs to move. + +pUserData (in, optional) + A pointer to application defined data that will be passed to onRead and onSeek. + +pAllocationCallbacks (in, optional) + A pointer to application defined callbacks for managing memory allocations. + + +Return Value +------------ +Returns a pointer to an object representing the decoder. + + +Remarks +------- +Close the decoder with `drflac_close()`. + +`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`. + +This function will automatically detect whether or not you are attempting to open a native or Ogg encapsulated FLAC, both of which should work seamlessly +without any manual intervention. Ogg encapsulation also works with multiplexed streams which basically means it can play FLAC encoded audio tracks in videos. + +This is the lowest level function for opening a FLAC stream. You can also use `drflac_open_file()` and `drflac_open_memory()` to open the stream from a file or +from a block of memory respectively. + +The STREAMINFO block must be present for this to succeed. Use `drflac_open_relaxed()` to open a FLAC stream where the header may not be present. + +Use `drflac_open_with_metadata()` if you need access to metadata. + + +Seek Also +--------- +drflac_open_file() +drflac_open_memory() +drflac_open_with_metadata() +drflac_close() +*/ +DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Opens a FLAC stream with relaxed validation of the header block. + + +Parameters +---------- +onRead (in) + The function to call when data needs to be read from the client. + +onSeek (in) + The function to call when the read position of the client data needs to move. + +container (in) + Whether or not the FLAC stream is encapsulated using standard FLAC encapsulation or Ogg encapsulation. + +pUserData (in, optional) + A pointer to application defined data that will be passed to onRead and onSeek. + +pAllocationCallbacks (in, optional) + A pointer to application defined callbacks for managing memory allocations. + + +Return Value +------------ +A pointer to an object representing the decoder. + + +Remarks +------- +The same as drflac_open(), except attempts to open the stream even when a header block is not present. + +Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do not set this to `drflac_container_unknown` +as that is for internal use only. + +Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never found it will continue forever. To abort, +force your `onRead` callback to return 0, which dr_flac will use as an indicator that the end of the stream was found. + +Use `drflac_open_with_metadata_relaxed()` if you need access to metadata. +*/ +DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.). + + +Parameters +---------- +onRead (in) + The function to call when data needs to be read from the client. + +onSeek (in) + The function to call when the read position of the client data needs to move. + +onMeta (in) + The function to call for every metadata block. + +pUserData (in, optional) + A pointer to application defined data that will be passed to onRead, onSeek and onMeta. + +pAllocationCallbacks (in, optional) + A pointer to application defined callbacks for managing memory allocations. + + +Return Value +------------ +A pointer to an object representing the decoder. + + +Remarks +------- +Close the decoder with `drflac_close()`. + +`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`. + +This is slower than `drflac_open()`, so avoid this one if you don't need metadata. Internally, this will allocate and free memory on the heap for every +metadata block except for STREAMINFO and PADDING blocks. + +The caller is notified of the metadata via the `onMeta` callback. All metadata blocks will be handled before the function returns. This callback takes a +pointer to a `drflac_metadata` object which is a union containing the data of all relevant metadata blocks. Use the `type` member to discriminate against +the different metadata types. + +The STREAMINFO block must be present for this to succeed. Use `drflac_open_with_metadata_relaxed()` to open a FLAC stream where the header may not be present. + +Note that this will behave inconsistently with `drflac_open()` if the stream is an Ogg encapsulated stream and a metadata block is corrupted. This is due to +the way the Ogg stream recovers from corrupted pages. When `drflac_open_with_metadata()` is being used, the open routine will try to read the contents of the +metadata block, whereas `drflac_open()` will simply seek past it (for the sake of efficiency). This inconsistency can result in different samples being +returned depending on whether or not the stream is being opened with metadata. + + +Seek Also +--------- +drflac_open_file_with_metadata() +drflac_open_memory_with_metadata() +drflac_open() +drflac_close() +*/ +DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present. + +See Also +-------- +drflac_open_with_metadata() +drflac_open_relaxed() +*/ +DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Closes the given FLAC decoder. + + +Parameters +---------- +pFlac (in) + The decoder to close. + + +Remarks +------- +This will destroy the decoder object. + + +See Also +-------- +drflac_open() +drflac_open_with_metadata() +drflac_open_file() +drflac_open_file_w() +drflac_open_file_with_metadata() +drflac_open_file_with_metadata_w() +drflac_open_memory() +drflac_open_memory_with_metadata() +*/ +DRFLAC_API void drflac_close(drflac* pFlac); + + +/* +Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM. + + +Parameters +---------- +pFlac (in) + The decoder. + +framesToRead (in) + The number of PCM frames to read. + +pBufferOut (out, optional) + A pointer to the buffer that will receive the decoded samples. + + +Return Value +------------ +Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end. + + +Remarks +------- +pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked. +*/ +DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut); + + +/* +Reads sample data from the given FLAC decoder, output as interleaved signed 16-bit PCM. + + +Parameters +---------- +pFlac (in) + The decoder. + +framesToRead (in) + The number of PCM frames to read. + +pBufferOut (out, optional) + A pointer to the buffer that will receive the decoded samples. + + +Return Value +------------ +Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end. + + +Remarks +------- +pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked. + +Note that this is lossy for streams where the bits per sample is larger than 16. +*/ +DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut); + +/* +Reads sample data from the given FLAC decoder, output as interleaved 32-bit floating point PCM. + + +Parameters +---------- +pFlac (in) + The decoder. + +framesToRead (in) + The number of PCM frames to read. + +pBufferOut (out, optional) + A pointer to the buffer that will receive the decoded samples. + + +Return Value +------------ +Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end. + + +Remarks +------- +pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked. + +Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly represent every possible number. +*/ +DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut); + +/* +Seeks to the PCM frame at the given index. + + +Parameters +---------- +pFlac (in) + The decoder. + +pcmFrameIndex (in) + The index of the PCM frame to seek to. See notes below. + + +Return Value +------------- +`DRFLAC_TRUE` if successful; `DRFLAC_FALSE` otherwise. +*/ +DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex); + + + +/* +Opens a FLAC decoder from a pre-allocated block of memory + + +Parameters +---------- +pData (in) + A pointer to the raw encoded FLAC data. + +dataSize (in) + The size in bytes of `data`. + +pAllocationCallbacks (in) + A pointer to application defined callbacks for managing memory allocations. + + +Return Value +------------ +A pointer to an object representing the decoder. + + +Remarks +------- +This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for the lifetime of the decoder. + + +See Also +-------- +drflac_open() +drflac_close() +*/ +DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Opens a FLAC decoder from a pre-allocated block of memory and notifies the caller of the metadata chunks (album art, etc.) + + +Parameters +---------- +pData (in) + A pointer to the raw encoded FLAC data. + +dataSize (in) + The size in bytes of `data`. + +onMeta (in) + The callback to fire for each metadata block. + +pUserData (in) + A pointer to the user data to pass to the metadata callback. + +pAllocationCallbacks (in) + A pointer to application defined callbacks for managing memory allocations. + + +Remarks +------- +Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled. + + +See Also +------- +drflac_open_with_metadata() +drflac_open() +drflac_close() +*/ +DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks); + + + +/* High Level APIs */ + +/* +Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a +pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with drflac_free(). + +You can pass in custom memory allocation callbacks via the pAllocationCallbacks parameter. This can be NULL in which +case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE. + +Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously +read samples into a dynamically sized buffer on the heap until no samples are left. + +Do not call this function on a broadcast type of stream (like internet radio streams and whatnot). +*/ +DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */ +DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */ +DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a block of memory. */ +DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */ +DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */ +DRFLAC_API float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks); + +/* +Frees memory that was allocated internally by dr_flac. + +Set pAllocationCallbacks to the same object that was passed to drflac_open_*_and_read_pcm_frames_*(). If you originally passed in NULL, pass in NULL for this. +*/ +DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks); + + +/* Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block. */ +typedef struct +{ + drflac_uint32 countRemaining; + const char* pRunningData; +} drflac_vorbis_comment_iterator; + +/* +Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT +metadata block. +*/ +DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments); + +/* +Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The +returned string is NOT null terminated. +*/ +DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut); + + +/* Structure representing an iterator for cuesheet tracks in a CUESHEET metadata block. */ +typedef struct +{ + drflac_uint32 countRemaining; + const char* pRunningData; +} drflac_cuesheet_track_iterator; + +/* Packing is important on this structure because we map this directly to the raw data within the CUESHEET metadata block. */ +#pragma pack(4) +typedef struct +{ + drflac_uint64 offset; + drflac_uint8 index; + drflac_uint8 reserved[3]; +} drflac_cuesheet_track_index; +#pragma pack() + +typedef struct +{ + drflac_uint64 offset; + drflac_uint8 trackNumber; + char ISRC[12]; + drflac_bool8 isAudio; + drflac_bool8 preEmphasis; + drflac_uint8 indexCount; + const drflac_cuesheet_track_index* pIndexPoints; +} drflac_cuesheet_track; + +/* +Initializes a cuesheet track iterator. This can be used for iterating over the cuesheet tracks in a CUESHEET metadata +block. +*/ +DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData); + +/* Goes to the next cuesheet track in the given iterator. If DRFLAC_FALSE is returned it means there are no more comments. */ +DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack); + + +#ifdef __cplusplus +} +#endif +#endif /* dr_flac_h */ + + +/************************************************************************************************************************************************************ + ************************************************************************************************************************************************************ + + IMPLEMENTATION + + ************************************************************************************************************************************************************ + ************************************************************************************************************************************************************/ +#if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION) +#ifndef dr_flac_c +#define dr_flac_c + +/* Disable some annoying warnings. */ +#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))) + #pragma GCC diagnostic push + #if __GNUC__ >= 7 + #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" + #endif +#endif + +#ifdef __linux__ + #ifndef _BSD_SOURCE + #define _BSD_SOURCE + #endif + #ifndef _DEFAULT_SOURCE + #define _DEFAULT_SOURCE + #endif + #ifndef __USE_BSD + #define __USE_BSD + #endif + #include +#endif + +#include +#include + +#ifdef _MSC_VER + #define DRFLAC_INLINE __forceinline +#elif defined(__GNUC__) + /* + I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when + the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some + case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the + command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue + I am using "__inline__" only when we're compiling in strict ANSI mode. + */ + #if defined(__STRICT_ANSI__) + #define DRFLAC_INLINE __inline__ __attribute__((always_inline)) + #else + #define DRFLAC_INLINE inline __attribute__((always_inline)) + #endif +#elif defined(__WATCOMC__) + #define DRFLAC_INLINE __inline +#else + #define DRFLAC_INLINE +#endif + +/* CPU architecture. */ +#if defined(__x86_64__) || defined(_M_X64) + #define DRFLAC_X64 +#elif defined(__i386) || defined(_M_IX86) + #define DRFLAC_X86 +#elif defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64) + #define DRFLAC_ARM +#endif + +/* +Intrinsics Support + +There's a bug in GCC 4.2.x which results in an incorrect compilation error when using _mm_slli_epi32() where it complains with + + "error: shift must be an immediate" + +Unfortuantely dr_flac depends on this for a few things so we're just going to disable SSE on GCC 4.2 and below. +*/ +#if !defined(DR_FLAC_NO_SIMD) + #if defined(DRFLAC_X64) || defined(DRFLAC_X86) + #if defined(_MSC_VER) && !defined(__clang__) + /* MSVC. */ + #if _MSC_VER >= 1400 && !defined(DRFLAC_NO_SSE2) /* 2005 */ + #define DRFLAC_SUPPORT_SSE2 + #endif + #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41) /* 2010 */ + #define DRFLAC_SUPPORT_SSE41 + #endif + #elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) + /* Assume GNUC-style. */ + #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2) + #define DRFLAC_SUPPORT_SSE2 + #endif + #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41) + #define DRFLAC_SUPPORT_SSE41 + #endif + #endif + + /* If at this point we still haven't determined compiler support for the intrinsics just fall back to __has_include. */ + #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include) + #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include() + #define DRFLAC_SUPPORT_SSE2 + #endif + #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include() + #define DRFLAC_SUPPORT_SSE41 + #endif + #endif + + #if defined(DRFLAC_SUPPORT_SSE41) + #include + #elif defined(DRFLAC_SUPPORT_SSE2) + #include + #endif + #endif + + #if defined(DRFLAC_ARM) + #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)) + #define DRFLAC_SUPPORT_NEON + #endif + + /* Fall back to looking for the #include file. */ + #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include) + #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include() + #define DRFLAC_SUPPORT_NEON + #endif + #endif + + #if defined(DRFLAC_SUPPORT_NEON) + #include + #endif + #endif +#endif + +/* Compile-time CPU feature support. */ +#if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) + #if defined(_MSC_VER) && !defined(__clang__) + #if _MSC_VER >= 1400 + #include + static void drflac__cpuid(int info[4], int fid) + { + __cpuid(info, fid); + } + #else + #define DRFLAC_NO_CPUID + #endif + #else + #if defined(__GNUC__) || defined(__clang__) + static void drflac__cpuid(int info[4], int fid) + { + /* + It looks like the -fPIC option uses the ebx register which GCC complains about. We can work around this by just using a different register, the + specific register of which I'm letting the compiler decide on. The "k" prefix is used to specify a 32-bit register. The {...} syntax is for + supporting different assembly dialects. + + What's basically happening is that we're saving and restoring the ebx register manually. + */ + #if defined(DRFLAC_X86) && defined(__PIC__) + __asm__ __volatile__ ( + "xchg{l} {%%}ebx, %k1;" + "cpuid;" + "xchg{l} {%%}ebx, %k1;" + : "=a"(info[0]), "=&r"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0) + ); + #else + __asm__ __volatile__ ( + "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0) + ); + #endif + } + #else + #define DRFLAC_NO_CPUID + #endif + #endif +#else + #define DRFLAC_NO_CPUID +#endif + +static DRFLAC_INLINE drflac_bool32 drflac_has_sse2(void) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2) + #if defined(DRFLAC_X64) + return DRFLAC_TRUE; /* 64-bit targets always support SSE2. */ + #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__) + return DRFLAC_TRUE; /* If the compiler is allowed to freely generate SSE2 code we can assume support. */ + #else + #if defined(DRFLAC_NO_CPUID) + return DRFLAC_FALSE; + #else + int info[4]; + drflac__cpuid(info, 1); + return (info[3] & (1 << 26)) != 0; + #endif + #endif + #else + return DRFLAC_FALSE; /* SSE2 is only supported on x86 and x64 architectures. */ + #endif +#else + return DRFLAC_FALSE; /* No compiler support. */ +#endif +} + +static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void) +{ +#if defined(DRFLAC_SUPPORT_SSE41) + #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41) + #if defined(DRFLAC_X64) + return DRFLAC_TRUE; /* 64-bit targets always support SSE4.1. */ + #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__) + return DRFLAC_TRUE; /* If the compiler is allowed to freely generate SSE41 code we can assume support. */ + #else + #if defined(DRFLAC_NO_CPUID) + return DRFLAC_FALSE; + #else + int info[4]; + drflac__cpuid(info, 1); + return (info[2] & (1 << 19)) != 0; + #endif + #endif + #else + return DRFLAC_FALSE; /* SSE41 is only supported on x86 and x64 architectures. */ + #endif +#else + return DRFLAC_FALSE; /* No compiler support. */ +#endif +} + + +#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) && !defined(__clang__) + #define DRFLAC_HAS_LZCNT_INTRINSIC +#elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) + #define DRFLAC_HAS_LZCNT_INTRINSIC +#elif defined(__clang__) + #if defined(__has_builtin) + #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl) + #define DRFLAC_HAS_LZCNT_INTRINSIC + #endif + #endif +#endif + +#if defined(_MSC_VER) && _MSC_VER >= 1400 && !defined(__clang__) + #define DRFLAC_HAS_BYTESWAP16_INTRINSIC + #define DRFLAC_HAS_BYTESWAP32_INTRINSIC + #define DRFLAC_HAS_BYTESWAP64_INTRINSIC +#elif defined(__clang__) + #if defined(__has_builtin) + #if __has_builtin(__builtin_bswap16) + #define DRFLAC_HAS_BYTESWAP16_INTRINSIC + #endif + #if __has_builtin(__builtin_bswap32) + #define DRFLAC_HAS_BYTESWAP32_INTRINSIC + #endif + #if __has_builtin(__builtin_bswap64) + #define DRFLAC_HAS_BYTESWAP64_INTRINSIC + #endif + #endif +#elif defined(__GNUC__) + #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define DRFLAC_HAS_BYTESWAP32_INTRINSIC + #define DRFLAC_HAS_BYTESWAP64_INTRINSIC + #endif + #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) + #define DRFLAC_HAS_BYTESWAP16_INTRINSIC + #endif +#endif + + +/* Standard library stuff. */ +#ifndef DRFLAC_ASSERT +#include +#define DRFLAC_ASSERT(expression) assert(expression) +#endif +#ifndef DRFLAC_MALLOC +#define DRFLAC_MALLOC(sz) malloc((sz)) +#endif +#ifndef DRFLAC_REALLOC +#define DRFLAC_REALLOC(p, sz) realloc((p), (sz)) +#endif +#ifndef DRFLAC_FREE +#define DRFLAC_FREE(p) free((p)) +#endif +#ifndef DRFLAC_COPY_MEMORY +#define DRFLAC_COPY_MEMORY(dst, src, sz) memcpy((dst), (src), (sz)) +#endif +#ifndef DRFLAC_ZERO_MEMORY +#define DRFLAC_ZERO_MEMORY(p, sz) memset((p), 0, (sz)) +#endif +#ifndef DRFLAC_ZERO_OBJECT +#define DRFLAC_ZERO_OBJECT(p) DRFLAC_ZERO_MEMORY((p), sizeof(*(p))) +#endif + +#define DRFLAC_MAX_SIMD_VECTOR_SIZE 64 /* 64 for AVX-512 in the future. */ + +typedef drflac_int32 drflac_result; +#define DRFLAC_SUCCESS 0 +#define DRFLAC_ERROR -1 /* A generic error. */ +#define DRFLAC_INVALID_ARGS -2 +#define DRFLAC_INVALID_OPERATION -3 +#define DRFLAC_OUT_OF_MEMORY -4 +#define DRFLAC_OUT_OF_RANGE -5 +#define DRFLAC_ACCESS_DENIED -6 +#define DRFLAC_DOES_NOT_EXIST -7 +#define DRFLAC_ALREADY_EXISTS -8 +#define DRFLAC_TOO_MANY_OPEN_FILES -9 +#define DRFLAC_INVALID_FILE -10 +#define DRFLAC_TOO_BIG -11 +#define DRFLAC_PATH_TOO_LONG -12 +#define DRFLAC_NAME_TOO_LONG -13 +#define DRFLAC_NOT_DIRECTORY -14 +#define DRFLAC_IS_DIRECTORY -15 +#define DRFLAC_DIRECTORY_NOT_EMPTY -16 +#define DRFLAC_END_OF_FILE -17 +#define DRFLAC_NO_SPACE -18 +#define DRFLAC_BUSY -19 +#define DRFLAC_IO_ERROR -20 +#define DRFLAC_INTERRUPT -21 +#define DRFLAC_UNAVAILABLE -22 +#define DRFLAC_ALREADY_IN_USE -23 +#define DRFLAC_BAD_ADDRESS -24 +#define DRFLAC_BAD_SEEK -25 +#define DRFLAC_BAD_PIPE -26 +#define DRFLAC_DEADLOCK -27 +#define DRFLAC_TOO_MANY_LINKS -28 +#define DRFLAC_NOT_IMPLEMENTED -29 +#define DRFLAC_NO_MESSAGE -30 +#define DRFLAC_BAD_MESSAGE -31 +#define DRFLAC_NO_DATA_AVAILABLE -32 +#define DRFLAC_INVALID_DATA -33 +#define DRFLAC_TIMEOUT -34 +#define DRFLAC_NO_NETWORK -35 +#define DRFLAC_NOT_UNIQUE -36 +#define DRFLAC_NOT_SOCKET -37 +#define DRFLAC_NO_ADDRESS -38 +#define DRFLAC_BAD_PROTOCOL -39 +#define DRFLAC_PROTOCOL_UNAVAILABLE -40 +#define DRFLAC_PROTOCOL_NOT_SUPPORTED -41 +#define DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED -42 +#define DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED -43 +#define DRFLAC_SOCKET_NOT_SUPPORTED -44 +#define DRFLAC_CONNECTION_RESET -45 +#define DRFLAC_ALREADY_CONNECTED -46 +#define DRFLAC_NOT_CONNECTED -47 +#define DRFLAC_CONNECTION_REFUSED -48 +#define DRFLAC_NO_HOST -49 +#define DRFLAC_IN_PROGRESS -50 +#define DRFLAC_CANCELLED -51 +#define DRFLAC_MEMORY_ALREADY_MAPPED -52 +#define DRFLAC_AT_END -53 +#define DRFLAC_CRC_MISMATCH -128 + +#define DRFLAC_SUBFRAME_CONSTANT 0 +#define DRFLAC_SUBFRAME_VERBATIM 1 +#define DRFLAC_SUBFRAME_FIXED 8 +#define DRFLAC_SUBFRAME_LPC 32 +#define DRFLAC_SUBFRAME_RESERVED 255 + +#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE 0 +#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2 1 + +#define DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT 0 +#define DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE 8 +#define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE 9 +#define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE 10 + +#define drflac_align(x, a) ((((x) + (a) - 1) / (a)) * (a)) + + +DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision) +{ + if (pMajor) { + *pMajor = DRFLAC_VERSION_MAJOR; + } + + if (pMinor) { + *pMinor = DRFLAC_VERSION_MINOR; + } + + if (pRevision) { + *pRevision = DRFLAC_VERSION_REVISION; + } +} + +DRFLAC_API const char* drflac_version_string(void) +{ + return DRFLAC_VERSION_STRING; +} + + +/* CPU caps. */ +#if defined(__has_feature) + #if __has_feature(thread_sanitizer) + #define DRFLAC_NO_THREAD_SANITIZE __attribute__((no_sanitize("thread"))) + #else + #define DRFLAC_NO_THREAD_SANITIZE + #endif +#else + #define DRFLAC_NO_THREAD_SANITIZE +#endif + +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) +static drflac_bool32 drflac__gIsLZCNTSupported = DRFLAC_FALSE; +#endif + +#ifndef DRFLAC_NO_CPUID +static drflac_bool32 drflac__gIsSSE2Supported = DRFLAC_FALSE; +static drflac_bool32 drflac__gIsSSE41Supported = DRFLAC_FALSE; + +/* +I've had a bug report that Clang's ThreadSanitizer presents a warning in this function. Having reviewed this, this does +actually make sense. However, since CPU caps should never differ for a running process, I don't think the trade off of +complicating internal API's by passing around CPU caps versus just disabling the warnings is worthwhile. I'm therefore +just going to disable these warnings. This is disabled via the DRFLAC_NO_THREAD_SANITIZE attribute. +*/ +DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void) +{ + static drflac_bool32 isCPUCapsInitialized = DRFLAC_FALSE; + + if (!isCPUCapsInitialized) { + /* LZCNT */ +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) + int info[4] = {0}; + drflac__cpuid(info, 0x80000001); + drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0; +#endif + + /* SSE2 */ + drflac__gIsSSE2Supported = drflac_has_sse2(); + + /* SSE4.1 */ + drflac__gIsSSE41Supported = drflac_has_sse41(); + + /* Initialized. */ + isCPUCapsInitialized = DRFLAC_TRUE; + } +} +#else +static drflac_bool32 drflac__gIsNEONSupported = DRFLAC_FALSE; + +static DRFLAC_INLINE drflac_bool32 drflac__has_neon(void) +{ +#if defined(DRFLAC_SUPPORT_NEON) + #if defined(DRFLAC_ARM) && !defined(DRFLAC_NO_NEON) + #if (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)) + return DRFLAC_TRUE; /* If the compiler is allowed to freely generate NEON code we can assume support. */ + #else + /* TODO: Runtime check. */ + return DRFLAC_FALSE; + #endif + #else + return DRFLAC_FALSE; /* NEON is only supported on ARM architectures. */ + #endif +#else + return DRFLAC_FALSE; /* No compiler support. */ +#endif +} + +DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void) +{ + drflac__gIsNEONSupported = drflac__has_neon(); + +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) + drflac__gIsLZCNTSupported = DRFLAC_TRUE; +#endif +} +#endif + + +/* Endian Management */ +static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian(void) +{ +#if defined(DRFLAC_X86) || defined(DRFLAC_X64) + return DRFLAC_TRUE; +#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN + return DRFLAC_TRUE; +#else + int n = 1; + return (*(char*)&n) == 1; +#endif +} + +static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n) +{ +#ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC + #if defined(_MSC_VER) && !defined(__clang__) + return _byteswap_ushort(n); + #elif defined(__GNUC__) || defined(__clang__) + return __builtin_bswap16(n); + #else + #error "This compiler does not support the byte swap intrinsic." + #endif +#else + return ((n & 0xFF00) >> 8) | + ((n & 0x00FF) << 8); +#endif +} + +static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n) +{ +#ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC + #if defined(_MSC_VER) && !defined(__clang__) + return _byteswap_ulong(n); + #elif defined(__GNUC__) || defined(__clang__) + #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRFLAC_64BIT) /* <-- 64-bit inline assembly has not been tested, so disabling for now. */ + /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */ + drflac_uint32 r; + __asm__ __volatile__ ( + #if defined(DRFLAC_64BIT) + "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n) /* <-- This is untested. If someone in the community could test this, that would be appreciated! */ + #else + "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n) + #endif + ); + return r; + #else + return __builtin_bswap32(n); + #endif + #else + #error "This compiler does not support the byte swap intrinsic." + #endif +#else + return ((n & 0xFF000000) >> 24) | + ((n & 0x00FF0000) >> 8) | + ((n & 0x0000FF00) << 8) | + ((n & 0x000000FF) << 24); +#endif +} + +static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n) +{ +#ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC + #if defined(_MSC_VER) && !defined(__clang__) + return _byteswap_uint64(n); + #elif defined(__GNUC__) || defined(__clang__) + return __builtin_bswap64(n); + #else + #error "This compiler does not support the byte swap intrinsic." + #endif +#else + /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */ + return ((n & ((drflac_uint64)0xFF000000 << 32)) >> 56) | + ((n & ((drflac_uint64)0x00FF0000 << 32)) >> 40) | + ((n & ((drflac_uint64)0x0000FF00 << 32)) >> 24) | + ((n & ((drflac_uint64)0x000000FF << 32)) >> 8) | + ((n & ((drflac_uint64)0xFF000000 )) << 8) | + ((n & ((drflac_uint64)0x00FF0000 )) << 24) | + ((n & ((drflac_uint64)0x0000FF00 )) << 40) | + ((n & ((drflac_uint64)0x000000FF )) << 56); +#endif +} + + +static DRFLAC_INLINE drflac_uint16 drflac__be2host_16(drflac_uint16 n) +{ + if (drflac__is_little_endian()) { + return drflac__swap_endian_uint16(n); + } + + return n; +} + +static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n) +{ + if (drflac__is_little_endian()) { + return drflac__swap_endian_uint32(n); + } + + return n; +} + +static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n) +{ + if (drflac__is_little_endian()) { + return drflac__swap_endian_uint64(n); + } + + return n; +} + + +static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n) +{ + if (!drflac__is_little_endian()) { + return drflac__swap_endian_uint32(n); + } + + return n; +} + + +static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n) +{ + drflac_uint32 result = 0; + result |= (n & 0x7F000000) >> 3; + result |= (n & 0x007F0000) >> 2; + result |= (n & 0x00007F00) >> 1; + result |= (n & 0x0000007F) >> 0; + + return result; +} + + + +/* The CRC code below is based on this document: http://zlib.net/crc_v3.txt */ +static drflac_uint8 drflac__crc8_table[] = { + 0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D, + 0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D, + 0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD, + 0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85, 0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD, + 0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA, + 0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2, 0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A, + 0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32, 0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A, + 0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42, 0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A, + 0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C, 0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4, + 0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4, + 0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C, 0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44, + 0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C, 0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34, + 0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B, 0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63, + 0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B, 0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13, + 0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB, 0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83, + 0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3 +}; + +static drflac_uint16 drflac__crc16_table[] = { + 0x0000, 0x8005, 0x800F, 0x000A, 0x801B, 0x001E, 0x0014, 0x8011, + 0x8033, 0x0036, 0x003C, 0x8039, 0x0028, 0x802D, 0x8027, 0x0022, + 0x8063, 0x0066, 0x006C, 0x8069, 0x0078, 0x807D, 0x8077, 0x0072, + 0x0050, 0x8055, 0x805F, 0x005A, 0x804B, 0x004E, 0x0044, 0x8041, + 0x80C3, 0x00C6, 0x00CC, 0x80C9, 0x00D8, 0x80DD, 0x80D7, 0x00D2, + 0x00F0, 0x80F5, 0x80FF, 0x00FA, 0x80EB, 0x00EE, 0x00E4, 0x80E1, + 0x00A0, 0x80A5, 0x80AF, 0x00AA, 0x80BB, 0x00BE, 0x00B4, 0x80B1, + 0x8093, 0x0096, 0x009C, 0x8099, 0x0088, 0x808D, 0x8087, 0x0082, + 0x8183, 0x0186, 0x018C, 0x8189, 0x0198, 0x819D, 0x8197, 0x0192, + 0x01B0, 0x81B5, 0x81BF, 0x01BA, 0x81AB, 0x01AE, 0x01A4, 0x81A1, + 0x01E0, 0x81E5, 0x81EF, 0x01EA, 0x81FB, 0x01FE, 0x01F4, 0x81F1, + 0x81D3, 0x01D6, 0x01DC, 0x81D9, 0x01C8, 0x81CD, 0x81C7, 0x01C2, + 0x0140, 0x8145, 0x814F, 0x014A, 0x815B, 0x015E, 0x0154, 0x8151, + 0x8173, 0x0176, 0x017C, 0x8179, 0x0168, 0x816D, 0x8167, 0x0162, + 0x8123, 0x0126, 0x012C, 0x8129, 0x0138, 0x813D, 0x8137, 0x0132, + 0x0110, 0x8115, 0x811F, 0x011A, 0x810B, 0x010E, 0x0104, 0x8101, + 0x8303, 0x0306, 0x030C, 0x8309, 0x0318, 0x831D, 0x8317, 0x0312, + 0x0330, 0x8335, 0x833F, 0x033A, 0x832B, 0x032E, 0x0324, 0x8321, + 0x0360, 0x8365, 0x836F, 0x036A, 0x837B, 0x037E, 0x0374, 0x8371, + 0x8353, 0x0356, 0x035C, 0x8359, 0x0348, 0x834D, 0x8347, 0x0342, + 0x03C0, 0x83C5, 0x83CF, 0x03CA, 0x83DB, 0x03DE, 0x03D4, 0x83D1, + 0x83F3, 0x03F6, 0x03FC, 0x83F9, 0x03E8, 0x83ED, 0x83E7, 0x03E2, + 0x83A3, 0x03A6, 0x03AC, 0x83A9, 0x03B8, 0x83BD, 0x83B7, 0x03B2, + 0x0390, 0x8395, 0x839F, 0x039A, 0x838B, 0x038E, 0x0384, 0x8381, + 0x0280, 0x8285, 0x828F, 0x028A, 0x829B, 0x029E, 0x0294, 0x8291, + 0x82B3, 0x02B6, 0x02BC, 0x82B9, 0x02A8, 0x82AD, 0x82A7, 0x02A2, + 0x82E3, 0x02E6, 0x02EC, 0x82E9, 0x02F8, 0x82FD, 0x82F7, 0x02F2, + 0x02D0, 0x82D5, 0x82DF, 0x02DA, 0x82CB, 0x02CE, 0x02C4, 0x82C1, + 0x8243, 0x0246, 0x024C, 0x8249, 0x0258, 0x825D, 0x8257, 0x0252, + 0x0270, 0x8275, 0x827F, 0x027A, 0x826B, 0x026E, 0x0264, 0x8261, + 0x0220, 0x8225, 0x822F, 0x022A, 0x823B, 0x023E, 0x0234, 0x8231, + 0x8213, 0x0216, 0x021C, 0x8219, 0x0208, 0x820D, 0x8207, 0x0202 +}; + +static DRFLAC_INLINE drflac_uint8 drflac_crc8_byte(drflac_uint8 crc, drflac_uint8 data) +{ + return drflac__crc8_table[crc ^ data]; +} + +static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 data, drflac_uint32 count) +{ +#ifdef DR_FLAC_NO_CRC + (void)crc; + (void)data; + (void)count; + return 0; +#else + drflac_uint32 wholeBytes; + drflac_uint32 leftoverBits; + drflac_uint64 leftoverDataMask; + + static drflac_uint64 leftoverDataMaskTable[8] = { + 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F + }; + + DRFLAC_ASSERT(count <= 32); + + wholeBytes = count >> 3; + leftoverBits = count - (wholeBytes*8); + leftoverDataMask = leftoverDataMaskTable[leftoverBits]; + + switch (wholeBytes) { + case 4: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits))); + case 3: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits))); + case 2: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits))); + case 1: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits))); + case 0: if (leftoverBits > 0) crc = (drflac_uint8)((crc << leftoverBits) ^ drflac__crc8_table[(crc >> (8 - leftoverBits)) ^ (data & leftoverDataMask)]); + } + return crc; +#endif +} + +static DRFLAC_INLINE drflac_uint16 drflac_crc16_byte(drflac_uint16 crc, drflac_uint8 data) +{ + return (crc << 8) ^ drflac__crc16_table[(drflac_uint8)(crc >> 8) ^ data]; +} + +static DRFLAC_INLINE drflac_uint16 drflac_crc16_cache(drflac_uint16 crc, drflac_cache_t data) +{ +#ifdef DRFLAC_64BIT + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF)); +#endif + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 8) & 0xFF)); + crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 0) & 0xFF)); + + return crc; +} + +static DRFLAC_INLINE drflac_uint16 drflac_crc16_bytes(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 byteCount) +{ + switch (byteCount) + { +#ifdef DRFLAC_64BIT + case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF)); + case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF)); + case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF)); + case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF)); +#endif + case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF)); + case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF)); + case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 8) & 0xFF)); + case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 0) & 0xFF)); + } + + return crc; +} + +#ifdef DRFLAC_64BIT +#define drflac__be2host__cache_line drflac__be2host_64 +#else +#define drflac__be2host__cache_line drflac__be2host_32 +#endif + +/* +BIT READING ATTEMPT #2 + +This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting +on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache +is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an +array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data +from onRead() is read into. +*/ +#define DRFLAC_CACHE_L1_SIZE_BYTES(bs) (sizeof((bs)->cache)) +#define DRFLAC_CACHE_L1_SIZE_BITS(bs) (sizeof((bs)->cache)*8) +#define DRFLAC_CACHE_L1_BITS_REMAINING(bs) (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (bs)->consumedBits) +#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount) (~((~(drflac_cache_t)0) >> (_bitCount))) +#define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount) (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount)) +#define DRFLAC_CACHE_L1_SELECT(bs, _bitCount) (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)) +#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, _bitCount) (DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount))) +#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, _bitCount)(DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> (DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)) & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1))) +#define DRFLAC_CACHE_L2_SIZE_BYTES(bs) (sizeof((bs)->cacheL2)) +#define DRFLAC_CACHE_L2_LINE_COUNT(bs) (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0])) +#define DRFLAC_CACHE_L2_LINES_REMAINING(bs) (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line) + + +#ifndef DR_FLAC_NO_CRC +static DRFLAC_INLINE void drflac__reset_crc16(drflac_bs* bs) +{ + bs->crc16 = 0; + bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3; +} + +static DRFLAC_INLINE void drflac__update_crc16(drflac_bs* bs) +{ + if (bs->crc16CacheIgnoredBytes == 0) { + bs->crc16 = drflac_crc16_cache(bs->crc16, bs->crc16Cache); + } else { + bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache, DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bs->crc16CacheIgnoredBytes); + bs->crc16CacheIgnoredBytes = 0; + } +} + +static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs) +{ + /* We should never be flushing in a situation where we are not aligned on a byte boundary. */ + DRFLAC_ASSERT((DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7) == 0); + + /* + The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined + by the number of bits that have been consumed. + */ + if (DRFLAC_CACHE_L1_BITS_REMAINING(bs) == 0) { + drflac__update_crc16(bs); + } else { + /* We only accumulate the consumed bits. */ + bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache >> DRFLAC_CACHE_L1_BITS_REMAINING(bs), (bs->consumedBits >> 3) - bs->crc16CacheIgnoredBytes); + + /* + The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated + so we can handle that later. + */ + bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3; + } + + return bs->crc16; +} +#endif + +static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs) +{ + size_t bytesRead; + size_t alignedL1LineCount; + + /* Fast path. Try loading straight from L2. */ + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + bs->cache = bs->cacheL2[bs->nextL2Line++]; + return DRFLAC_TRUE; + } + + /* + If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's + any left. + */ + if (bs->unalignedByteCount > 0) { + return DRFLAC_FALSE; /* If we have any unaligned bytes it means there's no more aligned bytes left in the client. */ + } + + bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs)); + + bs->nextL2Line = 0; + if (bytesRead == DRFLAC_CACHE_L2_SIZE_BYTES(bs)) { + bs->cache = bs->cacheL2[bs->nextL2Line++]; + return DRFLAC_TRUE; + } + + + /* + If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably + means we've just reached the end of the file. We need to move the valid data down to the end of the buffer + and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to + the size of the L1 so we'll need to seek backwards by any misaligned bytes. + */ + alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES(bs); + + /* We need to keep track of any unaligned bytes for later use. */ + bs->unalignedByteCount = bytesRead - (alignedL1LineCount * DRFLAC_CACHE_L1_SIZE_BYTES(bs)); + if (bs->unalignedByteCount > 0) { + bs->unalignedCache = bs->cacheL2[alignedL1LineCount]; + } + + if (alignedL1LineCount > 0) { + size_t offset = DRFLAC_CACHE_L2_LINE_COUNT(bs) - alignedL1LineCount; + size_t i; + for (i = alignedL1LineCount; i > 0; --i) { + bs->cacheL2[i-1 + offset] = bs->cacheL2[i-1]; + } + + bs->nextL2Line = (drflac_uint32)offset; + bs->cache = bs->cacheL2[bs->nextL2Line++]; + return DRFLAC_TRUE; + } else { + /* If we get into this branch it means we weren't able to load any L1-aligned data. */ + bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs); + return DRFLAC_FALSE; + } +} + +static drflac_bool32 drflac__reload_cache(drflac_bs* bs) +{ + size_t bytesRead; + +#ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); +#endif + + /* Fast path. Try just moving the next value in the L2 cache to the L1 cache. */ + if (drflac__reload_l1_cache_from_l2(bs)) { + bs->cache = drflac__be2host__cache_line(bs->cache); + bs->consumedBits = 0; +#ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs->cache; +#endif + return DRFLAC_TRUE; + } + + /* Slow path. */ + + /* + If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last + few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the + data from the unaligned cache. + */ + bytesRead = bs->unalignedByteCount; + if (bytesRead == 0) { + bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); /* <-- The stream has been exhausted, so marked the bits as consumed. */ + return DRFLAC_FALSE; + } + + DRFLAC_ASSERT(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES(bs)); + bs->consumedBits = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bytesRead) * 8; + + bs->cache = drflac__be2host__cache_line(bs->unalignedCache); + bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_BITS_REMAINING(bs)); /* <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property. */ + bs->unalignedByteCount = 0; /* <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes. */ + +#ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs->cache >> bs->consumedBits; + bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3; +#endif + return DRFLAC_TRUE; +} + +static void drflac__reset_cache(drflac_bs* bs) +{ + bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs); /* <-- This clears the L2 cache. */ + bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); /* <-- This clears the L1 cache. */ + bs->cache = 0; + bs->unalignedByteCount = 0; /* <-- This clears the trailing unaligned bytes. */ + bs->unalignedCache = 0; + +#ifndef DR_FLAC_NO_CRC + bs->crc16Cache = 0; + bs->crc16CacheIgnoredBytes = 0; +#endif +} + + +static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned int bitCount, drflac_uint32* pResultOut) +{ + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResultOut != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 32); + + if (bs->consumedBits == DRFLAC_CACHE_L1_SIZE_BITS(bs)) { + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + } + + if (bitCount <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + /* + If we want to load all 32-bits from a 32-bit cache we need to do it slightly differently because we can't do + a 32-bit shift on a 32-bit integer. This will never be the case on 64-bit caches, so we can have a slightly + more optimal solution for this. + */ +#ifdef DRFLAC_64BIT + *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount); + bs->consumedBits += bitCount; + bs->cache <<= bitCount; +#else + if (bitCount < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { + *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount); + bs->consumedBits += bitCount; + bs->cache <<= bitCount; + } else { + /* Cannot shift by 32-bits, so need to do it differently. */ + *pResultOut = (drflac_uint32)bs->cache; + bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs); + bs->cache = 0; + } +#endif + + return DRFLAC_TRUE; + } else { + /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */ + drflac_uint32 bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs); + drflac_uint32 bitCountLo = bitCount - bitCountHi; + drflac_uint32 resultHi; + + DRFLAC_ASSERT(bitCountHi > 0); + DRFLAC_ASSERT(bitCountHi < 32); + resultHi = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi); + + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo); + bs->consumedBits += bitCountLo; + bs->cache <<= bitCountLo; + return DRFLAC_TRUE; + } +} + +static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, drflac_int32* pResult) +{ + drflac_uint32 result; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 32); + + if (!drflac__read_uint32(bs, bitCount, &result)) { + return DRFLAC_FALSE; + } + + /* Do not attempt to shift by 32 as it's undefined. */ + if (bitCount < 32) { + drflac_uint32 signbit; + signbit = ((result >> (bitCount-1)) & 0x01); + result |= (~signbit + 1) << bitCount; + } + + *pResult = (drflac_int32)result; + return DRFLAC_TRUE; +} + +#ifdef DRFLAC_64BIT +static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, drflac_uint64* pResultOut) +{ + drflac_uint32 resultHi; + drflac_uint32 resultLo; + + DRFLAC_ASSERT(bitCount <= 64); + DRFLAC_ASSERT(bitCount > 32); + + if (!drflac__read_uint32(bs, bitCount - 32, &resultHi)) { + return DRFLAC_FALSE; + } + + if (!drflac__read_uint32(bs, 32, &resultLo)) { + return DRFLAC_FALSE; + } + + *pResultOut = (((drflac_uint64)resultHi) << 32) | ((drflac_uint64)resultLo); + return DRFLAC_TRUE; +} +#endif + +static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, drflac_uint16* pResult) +{ + drflac_uint32 result; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 16); + + if (!drflac__read_uint32(bs, bitCount, &result)) { + return DRFLAC_FALSE; + } + + *pResult = (drflac_uint16)result; + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, drflac_uint8* pResult) +{ + drflac_uint32 result; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 8); + + if (!drflac__read_uint32(bs, bitCount, &result)) { + return DRFLAC_FALSE; + } + + *pResult = (drflac_uint8)result; + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__read_int8(drflac_bs* bs, unsigned int bitCount, drflac_int8* pResult) +{ + drflac_int32 result; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pResult != NULL); + DRFLAC_ASSERT(bitCount > 0); + DRFLAC_ASSERT(bitCount <= 8); + + if (!drflac__read_int32(bs, bitCount, &result)) { + return DRFLAC_FALSE; + } + + *pResult = (drflac_int8)result; + return DRFLAC_TRUE; +} + + +static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek) +{ + if (bitsToSeek <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) { + bs->consumedBits += (drflac_uint32)bitsToSeek; + bs->cache <<= bitsToSeek; + return DRFLAC_TRUE; + } else { + /* It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here. */ + bitsToSeek -= DRFLAC_CACHE_L1_BITS_REMAINING(bs); + bs->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING(bs); + bs->cache = 0; + + /* Simple case. Seek in groups of the same number as bits that fit within a cache line. */ +#ifdef DRFLAC_64BIT + while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) { + drflac_uint64 bin; + if (!drflac__read_uint64(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) { + return DRFLAC_FALSE; + } + bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs); + } +#else + while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) { + drflac_uint32 bin; + if (!drflac__read_uint32(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) { + return DRFLAC_FALSE; + } + bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs); + } +#endif + + /* Whole leftover bytes. */ + while (bitsToSeek >= 8) { + drflac_uint8 bin; + if (!drflac__read_uint8(bs, 8, &bin)) { + return DRFLAC_FALSE; + } + bitsToSeek -= 8; + } + + /* Leftover bits. */ + if (bitsToSeek > 0) { + drflac_uint8 bin; + if (!drflac__read_uint8(bs, (drflac_uint32)bitsToSeek, &bin)) { + return DRFLAC_FALSE; + } + bitsToSeek = 0; /* <-- Necessary for the assert below. */ + } + + DRFLAC_ASSERT(bitsToSeek == 0); + return DRFLAC_TRUE; + } +} + + +/* This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16. */ +static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs) +{ + DRFLAC_ASSERT(bs != NULL); + + /* + The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first + thing to do is align to the next byte. + */ + if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) { + return DRFLAC_FALSE; + } + + for (;;) { + drflac_uint8 hi; + +#ifndef DR_FLAC_NO_CRC + drflac__reset_crc16(bs); +#endif + + if (!drflac__read_uint8(bs, 8, &hi)) { + return DRFLAC_FALSE; + } + + if (hi == 0xFF) { + drflac_uint8 lo; + if (!drflac__read_uint8(bs, 6, &lo)) { + return DRFLAC_FALSE; + } + + if (lo == 0x3E) { + return DRFLAC_TRUE; + } else { + if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) { + return DRFLAC_FALSE; + } + } + } + } + + /* Should never get here. */ + /*return DRFLAC_FALSE;*/ +} + + +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) +#define DRFLAC_IMPLEMENT_CLZ_LZCNT +#endif +#if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(__clang__) +#define DRFLAC_IMPLEMENT_CLZ_MSVC +#endif + +static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x) +{ + drflac_uint32 n; + static drflac_uint32 clz_table_4[] = { + 0, + 4, + 3, 3, + 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1 + }; + + if (x == 0) { + return sizeof(x)*8; + } + + n = clz_table_4[x >> (sizeof(x)*8 - 4)]; + if (n == 0) { +#ifdef DRFLAC_64BIT + if ((x & ((drflac_uint64)0xFFFFFFFF << 32)) == 0) { n = 32; x <<= 32; } + if ((x & ((drflac_uint64)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; } + if ((x & ((drflac_uint64)0xFF000000 << 32)) == 0) { n += 8; x <<= 8; } + if ((x & ((drflac_uint64)0xF0000000 << 32)) == 0) { n += 4; x <<= 4; } +#else + if ((x & 0xFFFF0000) == 0) { n = 16; x <<= 16; } + if ((x & 0xFF000000) == 0) { n += 8; x <<= 8; } + if ((x & 0xF0000000) == 0) { n += 4; x <<= 4; } +#endif + n += clz_table_4[x >> (sizeof(x)*8 - 4)]; + } + + return n - 1; +} + +#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT +static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void) +{ + /* Fast compile time check for ARM. */ +#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) + return DRFLAC_TRUE; +#else + /* If the compiler itself does not support the intrinsic then we'll need to return false. */ + #ifdef DRFLAC_HAS_LZCNT_INTRINSIC + return drflac__gIsLZCNTSupported; + #else + return DRFLAC_FALSE; + #endif +#endif +} + +static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x) +{ + /* + It's critical for competitive decoding performance that this function be highly optimal. With MSVC we can use the __lzcnt64() and __lzcnt() intrinsics + to achieve good performance, however on GCC and Clang it's a little bit more annoying. The __builtin_clzl() and __builtin_clzll() intrinsics leave + it undefined as to the return value when `x` is 0. We need this to be well defined as returning 32 or 64, depending on whether or not it's a 32- or + 64-bit build. To work around this we would need to add a conditional to check for the x = 0 case, but this creates unnecessary inefficiency. To work + around this problem I have written some inline assembly to emit the LZCNT (x86) or CLZ (ARM) instruction directly which removes the need to include + the conditional. This has worked well in the past, but for some reason Clang's MSVC compatible driver, clang-cl, does not seem to be handling this + in the same way as the normal Clang driver. It seems that `clang-cl` is just outputting the wrong results sometimes, maybe due to some register + getting clobbered? + + I'm not sure if this is a bug with dr_flac's inlined assembly (most likely), a bug in `clang-cl` or just a misunderstanding on my part with inline + assembly rules for `clang-cl`. If somebody can identify an error in dr_flac's inlined assembly I'm happy to get that fixed. + + Fortunately there is an easy workaround for this. Clang implements MSVC-specific intrinsics for compatibility. It also defines _MSC_VER for extra + compatibility. We can therefore just check for _MSC_VER and use the MSVC intrinsic which, fortunately for us, Clang supports. It would still be nice + to know how to fix the inlined assembly for correctness sake, however. + */ + +#if defined(_MSC_VER) /*&& !defined(__clang__)*/ /* <-- Intentionally wanting Clang to use the MSVC __lzcnt64/__lzcnt intrinsics due to above ^. */ + #ifdef DRFLAC_64BIT + return (drflac_uint32)__lzcnt64(x); + #else + return (drflac_uint32)__lzcnt(x); + #endif +#else + #if defined(__GNUC__) || defined(__clang__) + #if defined(DRFLAC_X64) + { + drflac_uint64 r; + __asm__ __volatile__ ( + "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc" + ); + + return (drflac_uint32)r; + } + #elif defined(DRFLAC_X86) + { + drflac_uint32 r; + __asm__ __volatile__ ( + "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc" + ); + + return r; + } + #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(DRFLAC_64BIT) /* <-- I haven't tested 64-bit inline assembly, so only enabling this for the 32-bit build for now. */ + { + unsigned int r; + __asm__ __volatile__ ( + #if defined(DRFLAC_64BIT) + "clz %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(x) /* <-- This is untested. If someone in the community could test this, that would be appreciated! */ + #else + "clz %[out], %[in]" : [out]"=r"(r) : [in]"r"(x) + #endif + ); + + return r; + } + #else + if (x == 0) { + return sizeof(x)*8; + } + #ifdef DRFLAC_64BIT + return (drflac_uint32)__builtin_clzll((drflac_uint64)x); + #else + return (drflac_uint32)__builtin_clzl((drflac_uint32)x); + #endif + #endif + #else + /* Unsupported compiler. */ + #error "This compiler does not support the lzcnt intrinsic." + #endif +#endif +} +#endif + +#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC +#include /* For BitScanReverse(). */ + +static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x) +{ + drflac_uint32 n; + + if (x == 0) { + return sizeof(x)*8; + } + +#ifdef DRFLAC_64BIT + _BitScanReverse64((unsigned long*)&n, x); +#else + _BitScanReverse((unsigned long*)&n, x); +#endif + return sizeof(x)*8 - n - 1; +} +#endif + +static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x) +{ +#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT + if (drflac__is_lzcnt_supported()) { + return drflac__clz_lzcnt(x); + } else +#endif + { +#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC + return drflac__clz_msvc(x); +#else + return drflac__clz_software(x); +#endif + } +} + + +static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut) +{ + drflac_uint32 zeroCounter = 0; + drflac_uint32 setBitOffsetPlus1; + + while (bs->cache == 0) { + zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs); + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + } + + setBitOffsetPlus1 = drflac__clz(bs->cache); + setBitOffsetPlus1 += 1; + + bs->consumedBits += setBitOffsetPlus1; + bs->cache <<= setBitOffsetPlus1; + + *pOffsetOut = zeroCounter + setBitOffsetPlus1 - 1; + return DRFLAC_TRUE; +} + + + +static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFromStart) +{ + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(offsetFromStart > 0); + + /* + Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which + is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit. + To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder. + */ + if (offsetFromStart > 0x7FFFFFFF) { + drflac_uint64 bytesRemaining = offsetFromStart; + if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) { + return DRFLAC_FALSE; + } + bytesRemaining -= 0x7FFFFFFF; + + while (bytesRemaining > 0x7FFFFFFF) { + if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + bytesRemaining -= 0x7FFFFFFF; + } + + if (bytesRemaining > 0) { + if (!bs->onSeek(bs->pUserData, (int)bytesRemaining, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + } + } else { + if (!bs->onSeek(bs->pUserData, (int)offsetFromStart, drflac_seek_origin_start)) { + return DRFLAC_FALSE; + } + } + + /* The cache should be reset to force a reload of fresh data from the client. */ + drflac__reset_cache(bs); + return DRFLAC_TRUE; +} + + +static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64* pNumberOut, drflac_uint8* pCRCOut) +{ + drflac_uint8 crc; + drflac_uint64 result; + drflac_uint8 utf8[7] = {0}; + int byteCount; + int i; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(pNumberOut != NULL); + DRFLAC_ASSERT(pCRCOut != NULL); + + crc = *pCRCOut; + + if (!drflac__read_uint8(bs, 8, utf8)) { + *pNumberOut = 0; + return DRFLAC_AT_END; + } + crc = drflac_crc8(crc, utf8[0], 8); + + if ((utf8[0] & 0x80) == 0) { + *pNumberOut = utf8[0]; + *pCRCOut = crc; + return DRFLAC_SUCCESS; + } + + /*byteCount = 1;*/ + if ((utf8[0] & 0xE0) == 0xC0) { + byteCount = 2; + } else if ((utf8[0] & 0xF0) == 0xE0) { + byteCount = 3; + } else if ((utf8[0] & 0xF8) == 0xF0) { + byteCount = 4; + } else if ((utf8[0] & 0xFC) == 0xF8) { + byteCount = 5; + } else if ((utf8[0] & 0xFE) == 0xFC) { + byteCount = 6; + } else if ((utf8[0] & 0xFF) == 0xFE) { + byteCount = 7; + } else { + *pNumberOut = 0; + return DRFLAC_CRC_MISMATCH; /* Bad UTF-8 encoding. */ + } + + /* Read extra bytes. */ + DRFLAC_ASSERT(byteCount > 1); + + result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1))); + for (i = 1; i < byteCount; ++i) { + if (!drflac__read_uint8(bs, 8, utf8 + i)) { + *pNumberOut = 0; + return DRFLAC_AT_END; + } + crc = drflac_crc8(crc, utf8[i], 8); + + result = (result << 6) | (utf8[i] & 0x3F); + } + + *pNumberOut = result; + *pCRCOut = crc; + return DRFLAC_SUCCESS; +} + + + +/* +The next two functions are responsible for calculating the prediction. + +When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's +safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16. +*/ +static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) +{ + drflac_int32 prediction = 0; + + DRFLAC_ASSERT(order <= 32); + + /* 32-bit version. */ + + /* VC++ optimizes this to a single jmp. I've not yet verified this for other compilers. */ + switch (order) + { + case 32: prediction += coefficients[31] * pDecodedSamples[-32]; + case 31: prediction += coefficients[30] * pDecodedSamples[-31]; + case 30: prediction += coefficients[29] * pDecodedSamples[-30]; + case 29: prediction += coefficients[28] * pDecodedSamples[-29]; + case 28: prediction += coefficients[27] * pDecodedSamples[-28]; + case 27: prediction += coefficients[26] * pDecodedSamples[-27]; + case 26: prediction += coefficients[25] * pDecodedSamples[-26]; + case 25: prediction += coefficients[24] * pDecodedSamples[-25]; + case 24: prediction += coefficients[23] * pDecodedSamples[-24]; + case 23: prediction += coefficients[22] * pDecodedSamples[-23]; + case 22: prediction += coefficients[21] * pDecodedSamples[-22]; + case 21: prediction += coefficients[20] * pDecodedSamples[-21]; + case 20: prediction += coefficients[19] * pDecodedSamples[-20]; + case 19: prediction += coefficients[18] * pDecodedSamples[-19]; + case 18: prediction += coefficients[17] * pDecodedSamples[-18]; + case 17: prediction += coefficients[16] * pDecodedSamples[-17]; + case 16: prediction += coefficients[15] * pDecodedSamples[-16]; + case 15: prediction += coefficients[14] * pDecodedSamples[-15]; + case 14: prediction += coefficients[13] * pDecodedSamples[-14]; + case 13: prediction += coefficients[12] * pDecodedSamples[-13]; + case 12: prediction += coefficients[11] * pDecodedSamples[-12]; + case 11: prediction += coefficients[10] * pDecodedSamples[-11]; + case 10: prediction += coefficients[ 9] * pDecodedSamples[-10]; + case 9: prediction += coefficients[ 8] * pDecodedSamples[- 9]; + case 8: prediction += coefficients[ 7] * pDecodedSamples[- 8]; + case 7: prediction += coefficients[ 6] * pDecodedSamples[- 7]; + case 6: prediction += coefficients[ 5] * pDecodedSamples[- 6]; + case 5: prediction += coefficients[ 4] * pDecodedSamples[- 5]; + case 4: prediction += coefficients[ 3] * pDecodedSamples[- 4]; + case 3: prediction += coefficients[ 2] * pDecodedSamples[- 3]; + case 2: prediction += coefficients[ 1] * pDecodedSamples[- 2]; + case 1: prediction += coefficients[ 0] * pDecodedSamples[- 1]; + } + + return (drflac_int32)(prediction >> shift); +} + +static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) +{ + drflac_int64 prediction; + + DRFLAC_ASSERT(order <= 32); + + /* 64-bit version. */ + + /* This method is faster on the 32-bit build when compiling with VC++. See note below. */ +#ifndef DRFLAC_64BIT + if (order == 8) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; + prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; + } + else if (order == 7) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; + } + else if (order == 3) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + } + else if (order == 6) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + } + else if (order == 5) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + } + else if (order == 4) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + } + else if (order == 12) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; + prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; + prediction += coefficients[8] * (drflac_int64)pDecodedSamples[-9]; + prediction += coefficients[9] * (drflac_int64)pDecodedSamples[-10]; + prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11]; + prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12]; + } + else if (order == 2) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + } + else if (order == 1) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + } + else if (order == 10) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; + prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; + prediction += coefficients[8] * (drflac_int64)pDecodedSamples[-9]; + prediction += coefficients[9] * (drflac_int64)pDecodedSamples[-10]; + } + else if (order == 9) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; + prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; + prediction += coefficients[8] * (drflac_int64)pDecodedSamples[-9]; + } + else if (order == 11) + { + prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1]; + prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2]; + prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3]; + prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4]; + prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5]; + prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6]; + prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7]; + prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8]; + prediction += coefficients[8] * (drflac_int64)pDecodedSamples[-9]; + prediction += coefficients[9] * (drflac_int64)pDecodedSamples[-10]; + prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11]; + } + else + { + int j; + + prediction = 0; + for (j = 0; j < (int)order; ++j) { + prediction += coefficients[j] * (drflac_int64)pDecodedSamples[-j-1]; + } + } +#endif + + /* + VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some + reason. The ugly version above is faster so we'll just switch between the two depending on the target platform. + */ +#ifdef DRFLAC_64BIT + prediction = 0; + switch (order) + { + case 32: prediction += coefficients[31] * (drflac_int64)pDecodedSamples[-32]; + case 31: prediction += coefficients[30] * (drflac_int64)pDecodedSamples[-31]; + case 30: prediction += coefficients[29] * (drflac_int64)pDecodedSamples[-30]; + case 29: prediction += coefficients[28] * (drflac_int64)pDecodedSamples[-29]; + case 28: prediction += coefficients[27] * (drflac_int64)pDecodedSamples[-28]; + case 27: prediction += coefficients[26] * (drflac_int64)pDecodedSamples[-27]; + case 26: prediction += coefficients[25] * (drflac_int64)pDecodedSamples[-26]; + case 25: prediction += coefficients[24] * (drflac_int64)pDecodedSamples[-25]; + case 24: prediction += coefficients[23] * (drflac_int64)pDecodedSamples[-24]; + case 23: prediction += coefficients[22] * (drflac_int64)pDecodedSamples[-23]; + case 22: prediction += coefficients[21] * (drflac_int64)pDecodedSamples[-22]; + case 21: prediction += coefficients[20] * (drflac_int64)pDecodedSamples[-21]; + case 20: prediction += coefficients[19] * (drflac_int64)pDecodedSamples[-20]; + case 19: prediction += coefficients[18] * (drflac_int64)pDecodedSamples[-19]; + case 18: prediction += coefficients[17] * (drflac_int64)pDecodedSamples[-18]; + case 17: prediction += coefficients[16] * (drflac_int64)pDecodedSamples[-17]; + case 16: prediction += coefficients[15] * (drflac_int64)pDecodedSamples[-16]; + case 15: prediction += coefficients[14] * (drflac_int64)pDecodedSamples[-15]; + case 14: prediction += coefficients[13] * (drflac_int64)pDecodedSamples[-14]; + case 13: prediction += coefficients[12] * (drflac_int64)pDecodedSamples[-13]; + case 12: prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12]; + case 11: prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11]; + case 10: prediction += coefficients[ 9] * (drflac_int64)pDecodedSamples[-10]; + case 9: prediction += coefficients[ 8] * (drflac_int64)pDecodedSamples[- 9]; + case 8: prediction += coefficients[ 7] * (drflac_int64)pDecodedSamples[- 8]; + case 7: prediction += coefficients[ 6] * (drflac_int64)pDecodedSamples[- 7]; + case 6: prediction += coefficients[ 5] * (drflac_int64)pDecodedSamples[- 6]; + case 5: prediction += coefficients[ 4] * (drflac_int64)pDecodedSamples[- 5]; + case 4: prediction += coefficients[ 3] * (drflac_int64)pDecodedSamples[- 4]; + case 3: prediction += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 3]; + case 2: prediction += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 2]; + case 1: prediction += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1]; + } +#endif + + return (drflac_int32)(prediction >> shift); +} + +static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut) +{ + drflac_uint32 riceParamPlus1 = riceParam + 1; + /*drflac_cache_t riceParamPlus1Mask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1);*/ + drflac_uint32 riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1); + drflac_uint32 riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1; + + /* + The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have + no idea how this will work in practice... + */ + drflac_cache_t bs_cache = bs->cache; + drflac_uint32 bs_consumedBits = bs->consumedBits; + + /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */ + drflac_uint32 lzcount = drflac__clz(bs_cache); + if (lzcount < sizeof(bs_cache)*8) { + pZeroCounterOut[0] = lzcount; + + /* + It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting + this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled + outside of this function at a higher level. + */ + extract_rice_param_part: + bs_cache <<= lzcount; + bs_consumedBits += lzcount; + + if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) { + /* Getting here means the rice parameter part is wholly contained within the current cache line. */ + pRiceParamPartOut[0] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift); + bs_cache <<= riceParamPlus1; + bs_consumedBits += riceParamPlus1; + } else { + drflac_uint32 riceParamPartHi; + drflac_uint32 riceParamPartLo; + drflac_uint32 riceParamPartLoBitCount; + + /* + Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache + line, reload the cache, and then combine it with the head of the next cache line. + */ + + /* Grab the high part of the rice parameter part. */ + riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift); + + /* Before reloading the cache we need to grab the size in bits of the low part. */ + riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits; + DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32); + + /* Now reload the cache. */ + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = riceParamPartLoBitCount; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + /* Slow path. We need to fetch more data from the client. */ + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount; + } + + /* We should now have enough information to construct the rice parameter part. */ + riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount))); + pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo; + + bs_cache <<= riceParamPartLoBitCount; + } + } else { + /* + Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call + to drflac__clz() and we need to reload the cache. + */ + drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits); + for (;;) { + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = 0; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + /* Slow path. We need to fetch more data from the client. */ + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits; + } + + lzcount = drflac__clz(bs_cache); + zeroCounter += lzcount; + + if (lzcount < sizeof(bs_cache)*8) { + break; + } + } + + pZeroCounterOut[0] = zeroCounter; + goto extract_rice_param_part; + } + + /* Make sure the cache is restored at the end of it all. */ + bs->cache = bs_cache; + bs->consumedBits = bs_consumedBits; + + return DRFLAC_TRUE; +} + +static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac_uint8 riceParam) +{ + drflac_uint32 riceParamPlus1 = riceParam + 1; + drflac_uint32 riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1; + + /* + The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have + no idea how this will work in practice... + */ + drflac_cache_t bs_cache = bs->cache; + drflac_uint32 bs_consumedBits = bs->consumedBits; + + /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */ + drflac_uint32 lzcount = drflac__clz(bs_cache); + if (lzcount < sizeof(bs_cache)*8) { + /* + It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting + this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled + outside of this function at a higher level. + */ + extract_rice_param_part: + bs_cache <<= lzcount; + bs_consumedBits += lzcount; + + if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) { + /* Getting here means the rice parameter part is wholly contained within the current cache line. */ + bs_cache <<= riceParamPlus1; + bs_consumedBits += riceParamPlus1; + } else { + /* + Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache + line, reload the cache, and then combine it with the head of the next cache line. + */ + + /* Before reloading the cache we need to grab the size in bits of the low part. */ + drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits; + DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32); + + /* Now reload the cache. */ + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = riceParamPartLoBitCount; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + /* Slow path. We need to fetch more data from the client. */ + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount; + } + + bs_cache <<= riceParamPartLoBitCount; + } + } else { + /* + Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call + to drflac__clz() and we need to reload the cache. + */ + for (;;) { + if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) { + #ifndef DR_FLAC_NO_CRC + drflac__update_crc16(bs); + #endif + bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]); + bs_consumedBits = 0; + #ifndef DR_FLAC_NO_CRC + bs->crc16Cache = bs_cache; + #endif + } else { + /* Slow path. We need to fetch more data from the client. */ + if (!drflac__reload_cache(bs)) { + return DRFLAC_FALSE; + } + + bs_cache = bs->cache; + bs_consumedBits = bs->consumedBits; + } + + lzcount = drflac__clz(bs_cache); + if (lzcount < sizeof(bs_cache)*8) { + break; + } + } + + goto extract_rice_param_part; + } + + /* Make sure the cache is restored at the end of it all. */ + bs->cache = bs_cache; + bs->consumedBits = bs_consumedBits; + + return DRFLAC_TRUE; +} + + +static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorder(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + drflac_uint32 zeroCountPart0; + drflac_uint32 riceParamPart0; + drflac_uint32 riceParamMask; + drflac_uint32 i; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(count > 0); + DRFLAC_ASSERT(pSamplesOut != NULL); + + (void)bitsPerSample; + (void)order; + (void)shift; + (void)coefficients; + + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); + + i = 0; + while (i < count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) { + return DRFLAC_FALSE; + } + + /* Rice reconstruction. */ + riceParamPart0 &= riceParamMask; + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + + pSamplesOut[i] = riceParamPart0; + + i += 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + drflac_uint32 zeroCountPart0 = 0; + drflac_uint32 zeroCountPart1 = 0; + drflac_uint32 zeroCountPart2 = 0; + drflac_uint32 zeroCountPart3 = 0; + drflac_uint32 riceParamPart0 = 0; + drflac_uint32 riceParamPart1 = 0; + drflac_uint32 riceParamPart2 = 0; + drflac_uint32 riceParamPart3 = 0; + drflac_uint32 riceParamMask; + const drflac_int32* pSamplesOutEnd; + drflac_uint32 i; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(count > 0); + DRFLAC_ASSERT(pSamplesOut != NULL); + + if (order == 0) { + return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + } + + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); + pSamplesOutEnd = pSamplesOut + (count & ~3); + + if (bitsPerSample+shift > 32) { + while (pSamplesOut < pSamplesOutEnd) { + /* + Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version + against an array. Not sure why, but perhaps it's making more efficient use of registers? + */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) { + return DRFLAC_FALSE; + } + + riceParamPart0 &= riceParamMask; + riceParamPart1 &= riceParamMask; + riceParamPart2 &= riceParamMask; + riceParamPart3 &= riceParamMask; + + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart1 |= (zeroCountPart1 << riceParam); + riceParamPart2 |= (zeroCountPart2 << riceParam); + riceParamPart3 |= (zeroCountPart3 << riceParam); + + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; + riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; + riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; + + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); + pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1); + pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2); + pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3); + + pSamplesOut += 4; + } + } else { + while (pSamplesOut < pSamplesOutEnd) { + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) { + return DRFLAC_FALSE; + } + + riceParamPart0 &= riceParamMask; + riceParamPart1 &= riceParamMask; + riceParamPart2 &= riceParamMask; + riceParamPart3 &= riceParamMask; + + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart1 |= (zeroCountPart1 << riceParam); + riceParamPart2 |= (zeroCountPart2 << riceParam); + riceParamPart3 |= (zeroCountPart3 << riceParam); + + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01]; + riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01]; + riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01]; + + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); + pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1); + pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2); + pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3); + + pSamplesOut += 4; + } + } + + i = (count & ~3); + while (i < count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) { + return DRFLAC_FALSE; + } + + /* Rice reconstruction. */ + riceParamPart0 &= riceParamMask; + riceParamPart0 |= (zeroCountPart0 << riceParam); + riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01]; + /*riceParamPart0 = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/ + + /* Sample reconstruction. */ + if (bitsPerSample+shift > 32) { + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0); + } else { + pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0); + } + + i += 1; + pSamplesOut += 1; + } + + return DRFLAC_TRUE; +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE __m128i drflac__mm_packs_interleaved_epi32(__m128i a, __m128i b) +{ + __m128i r; + + /* Pack. */ + r = _mm_packs_epi32(a, b); + + /* a3a2 a1a0 b3b2 b1b0 -> a3a2 b3b2 a1a0 b1b0 */ + r = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 1, 2, 0)); + + /* a3a2 b3b2 a1a0 b1b0 -> a3b3 a2b2 a1b1 a0b0 */ + r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(3, 1, 2, 0)); + r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(3, 1, 2, 0)); + + return r; +} +#endif + +#if defined(DRFLAC_SUPPORT_SSE41) +static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a) +{ + return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); +} + +static DRFLAC_INLINE __m128i drflac__mm_hadd_epi32(__m128i x) +{ + __m128i x64 = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2))); + __m128i x32 = _mm_shufflelo_epi16(x64, _MM_SHUFFLE(1, 0, 3, 2)); + return _mm_add_epi32(x64, x32); +} + +static DRFLAC_INLINE __m128i drflac__mm_hadd_epi64(__m128i x) +{ + return _mm_add_epi64(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2))); +} + +static DRFLAC_INLINE __m128i drflac__mm_srai_epi64(__m128i x, int count) +{ + /* + To simplify this we are assuming count < 32. This restriction allows us to work on a low side and a high side. The low side + is shifted with zero bits, whereas the right side is shifted with sign bits. + */ + __m128i lo = _mm_srli_epi64(x, count); + __m128i hi = _mm_srai_epi32(x, count); + + hi = _mm_and_si128(hi, _mm_set_epi32(0xFFFFFFFF, 0, 0xFFFFFFFF, 0)); /* The high part needs to have the low part cleared. */ + + return _mm_or_si128(lo, hi); +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + int i; + drflac_uint32 riceParamMask; + drflac_int32* pDecodedSamples = pSamplesOut; + drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); + drflac_uint32 zeroCountParts0 = 0; + drflac_uint32 zeroCountParts1 = 0; + drflac_uint32 zeroCountParts2 = 0; + drflac_uint32 zeroCountParts3 = 0; + drflac_uint32 riceParamParts0 = 0; + drflac_uint32 riceParamParts1 = 0; + drflac_uint32 riceParamParts2 = 0; + drflac_uint32 riceParamParts3 = 0; + __m128i coefficients128_0; + __m128i coefficients128_4; + __m128i coefficients128_8; + __m128i samples128_0; + __m128i samples128_4; + __m128i samples128_8; + __m128i riceParamMask128; + + const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); + riceParamMask128 = _mm_set1_epi32(riceParamMask); + + /* Pre-load. */ + coefficients128_0 = _mm_setzero_si128(); + coefficients128_4 = _mm_setzero_si128(); + coefficients128_8 = _mm_setzero_si128(); + + samples128_0 = _mm_setzero_si128(); + samples128_4 = _mm_setzero_si128(); + samples128_8 = _mm_setzero_si128(); + + /* + Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than + what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results + in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted + so I think there's opportunity for this to be simplified. + */ +#if 1 + { + int runningOrder = order; + + /* 0 - 3. */ + if (runningOrder >= 4) { + coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0)); + samples128_0 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 4)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break; + case 2: coefficients128_0 = _mm_set_epi32(0, 0, coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0, 0); break; + case 1: coefficients128_0 = _mm_set_epi32(0, 0, 0, coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0, 0, 0); break; + } + runningOrder = 0; + } + + /* 4 - 7 */ + if (runningOrder >= 4) { + coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4)); + samples128_4 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 8)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break; + case 2: coefficients128_4 = _mm_set_epi32(0, 0, coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0, 0); break; + case 1: coefficients128_4 = _mm_set_epi32(0, 0, 0, coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0, 0, 0); break; + } + runningOrder = 0; + } + + /* 8 - 11 */ + if (runningOrder == 4) { + coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8)); + samples128_8 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 12)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break; + case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0); break; + case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0); break; + } + runningOrder = 0; + } + + /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ + coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3)); + coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3)); + coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3)); + } +#else + /* This causes strict-aliasing warnings with GCC. */ + switch (order) + { + case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12]; + case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11]; + case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10]; + case 9: ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9]; + case 8: ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8]; + case 7: ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7]; + case 6: ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6]; + case 5: ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5]; + case 4: ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4]; + case 3: ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3]; + case 2: ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2]; + case 1: ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1]; + } +#endif + + /* For this version we are doing one sample at a time. */ + while (pDecodedSamples < pDecodedSamplesEnd) { + __m128i prediction128; + __m128i zeroCountPart128; + __m128i riceParamPart128; + + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) { + return DRFLAC_FALSE; + } + + zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0); + riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0); + + riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128); + riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam)); + riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01))), _mm_set1_epi32(0x01))); /* <-- SSE2 compatible */ + /*riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01)), _mm_set1_epi32(0xFFFFFFFF)));*/ /* <-- Only supported from SSE4.1 and is slower in my testing... */ + + if (order <= 4) { + for (i = 0; i < 4; i += 1) { + prediction128 = _mm_mullo_epi32(coefficients128_0, samples128_0); + + /* Horizontal add and shift. */ + prediction128 = drflac__mm_hadd_epi32(prediction128); + prediction128 = _mm_srai_epi32(prediction128, shift); + prediction128 = _mm_add_epi32(riceParamPart128, prediction128); + + samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); + riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); + } + } else if (order <= 8) { + for (i = 0; i < 4; i += 1) { + prediction128 = _mm_mullo_epi32(coefficients128_4, samples128_4); + prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0)); + + /* Horizontal add and shift. */ + prediction128 = drflac__mm_hadd_epi32(prediction128); + prediction128 = _mm_srai_epi32(prediction128, shift); + prediction128 = _mm_add_epi32(riceParamPart128, prediction128); + + samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4); + samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); + riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); + } + } else { + for (i = 0; i < 4; i += 1) { + prediction128 = _mm_mullo_epi32(coefficients128_8, samples128_8); + prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_4, samples128_4)); + prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0)); + + /* Horizontal add and shift. */ + prediction128 = drflac__mm_hadd_epi32(prediction128); + prediction128 = _mm_srai_epi32(prediction128, shift); + prediction128 = _mm_add_epi32(riceParamPart128, prediction128); + + samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4); + samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4); + samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); + riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); + } + } + + /* We store samples in groups of 4. */ + _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0); + pDecodedSamples += 4; + } + + /* Make sure we process the last few samples. */ + i = (count & ~3); + while (i < (int)count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) { + return DRFLAC_FALSE; + } + + /* Rice reconstruction. */ + riceParamParts0 &= riceParamMask; + riceParamParts0 |= (zeroCountParts0 << riceParam); + riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01]; + + /* Sample reconstruction. */ + pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples); + + i += 1; + pDecodedSamples += 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + int i; + drflac_uint32 riceParamMask; + drflac_int32* pDecodedSamples = pSamplesOut; + drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); + drflac_uint32 zeroCountParts0 = 0; + drflac_uint32 zeroCountParts1 = 0; + drflac_uint32 zeroCountParts2 = 0; + drflac_uint32 zeroCountParts3 = 0; + drflac_uint32 riceParamParts0 = 0; + drflac_uint32 riceParamParts1 = 0; + drflac_uint32 riceParamParts2 = 0; + drflac_uint32 riceParamParts3 = 0; + __m128i coefficients128_0; + __m128i coefficients128_4; + __m128i coefficients128_8; + __m128i samples128_0; + __m128i samples128_4; + __m128i samples128_8; + __m128i prediction128; + __m128i riceParamMask128; + + const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + + DRFLAC_ASSERT(order <= 12); + + riceParamMask = (drflac_uint32)~((~0UL) << riceParam); + riceParamMask128 = _mm_set1_epi32(riceParamMask); + + prediction128 = _mm_setzero_si128(); + + /* Pre-load. */ + coefficients128_0 = _mm_setzero_si128(); + coefficients128_4 = _mm_setzero_si128(); + coefficients128_8 = _mm_setzero_si128(); + + samples128_0 = _mm_setzero_si128(); + samples128_4 = _mm_setzero_si128(); + samples128_8 = _mm_setzero_si128(); + +#if 1 + { + int runningOrder = order; + + /* 0 - 3. */ + if (runningOrder >= 4) { + coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0)); + samples128_0 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 4)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break; + case 2: coefficients128_0 = _mm_set_epi32(0, 0, coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0, 0); break; + case 1: coefficients128_0 = _mm_set_epi32(0, 0, 0, coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0, 0, 0); break; + } + runningOrder = 0; + } + + /* 4 - 7 */ + if (runningOrder >= 4) { + coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4)); + samples128_4 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 8)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break; + case 2: coefficients128_4 = _mm_set_epi32(0, 0, coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0, 0); break; + case 1: coefficients128_4 = _mm_set_epi32(0, 0, 0, coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0, 0, 0); break; + } + runningOrder = 0; + } + + /* 8 - 11 */ + if (runningOrder == 4) { + coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8)); + samples128_8 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 12)); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break; + case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0); break; + case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0); break; + } + runningOrder = 0; + } + + /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ + coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3)); + coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3)); + coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3)); + } +#else + switch (order) + { + case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12]; + case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11]; + case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10]; + case 9: ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9]; + case 8: ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8]; + case 7: ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7]; + case 6: ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6]; + case 5: ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5]; + case 4: ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4]; + case 3: ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3]; + case 2: ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2]; + case 1: ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1]; + } +#endif + + /* For this version we are doing one sample at a time. */ + while (pDecodedSamples < pDecodedSamplesEnd) { + __m128i zeroCountPart128; + __m128i riceParamPart128; + + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) { + return DRFLAC_FALSE; + } + + zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0); + riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0); + + riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128); + riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam)); + riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(1))), _mm_set1_epi32(1))); + + for (i = 0; i < 4; i += 1) { + prediction128 = _mm_xor_si128(prediction128, prediction128); /* Reset to 0. */ + + switch (order) + { + case 12: + case 11: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(1, 1, 0, 0)))); + case 10: + case 9: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(3, 3, 2, 2)))); + case 8: + case 7: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(1, 1, 0, 0)))); + case 6: + case 5: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(3, 3, 2, 2)))); + case 4: + case 3: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(1, 1, 0, 0)))); + case 2: + case 1: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(3, 3, 2, 2)))); + } + + /* Horizontal add and shift. */ + prediction128 = drflac__mm_hadd_epi64(prediction128); + prediction128 = drflac__mm_srai_epi64(prediction128, shift); + prediction128 = _mm_add_epi32(riceParamPart128, prediction128); + + /* Our value should be sitting in prediction128[0]. We need to combine this with our SSE samples. */ + samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4); + samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4); + samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4); + + /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */ + riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4); + } + + /* We store samples in groups of 4. */ + _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0); + pDecodedSamples += 4; + } + + /* Make sure we process the last few samples. */ + i = (count & ~3); + while (i < (int)count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) { + return DRFLAC_FALSE; + } + + /* Rice reconstruction. */ + riceParamParts0 &= riceParamMask; + riceParamParts0 |= (zeroCountParts0 << riceParam); + riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01]; + + /* Sample reconstruction. */ + pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples); + + i += 1; + pDecodedSamples += 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(count > 0); + DRFLAC_ASSERT(pSamplesOut != NULL); + + /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */ + if (order > 0 && order <= 12) { + if (bitsPerSample+shift > 32) { + return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut); + } else { + return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut); + } + } else { + return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac__vst2q_s32(drflac_int32* p, int32x4x2_t x) +{ + vst1q_s32(p+0, x.val[0]); + vst1q_s32(p+4, x.val[1]); +} + +static DRFLAC_INLINE void drflac__vst2q_u32(drflac_uint32* p, uint32x4x2_t x) +{ + vst1q_u32(p+0, x.val[0]); + vst1q_u32(p+4, x.val[1]); +} + +static DRFLAC_INLINE void drflac__vst2q_f32(float* p, float32x4x2_t x) +{ + vst1q_f32(p+0, x.val[0]); + vst1q_f32(p+4, x.val[1]); +} + +static DRFLAC_INLINE void drflac__vst2q_s16(drflac_int16* p, int16x4x2_t x) +{ + vst1q_s16(p, vcombine_s16(x.val[0], x.val[1])); +} + +static DRFLAC_INLINE void drflac__vst2q_u16(drflac_uint16* p, uint16x4x2_t x) +{ + vst1q_u16(p, vcombine_u16(x.val[0], x.val[1])); +} + +static DRFLAC_INLINE int32x4_t drflac__vdupq_n_s32x4(drflac_int32 x3, drflac_int32 x2, drflac_int32 x1, drflac_int32 x0) +{ + drflac_int32 x[4]; + x[3] = x3; + x[2] = x2; + x[1] = x1; + x[0] = x0; + return vld1q_s32(x); +} + +static DRFLAC_INLINE int32x4_t drflac__valignrq_s32_1(int32x4_t a, int32x4_t b) +{ + /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */ + + /* Reference */ + /*return drflac__vdupq_n_s32x4( + vgetq_lane_s32(a, 0), + vgetq_lane_s32(b, 3), + vgetq_lane_s32(b, 2), + vgetq_lane_s32(b, 1) + );*/ + + return vextq_s32(b, a, 1); +} + +static DRFLAC_INLINE uint32x4_t drflac__valignrq_u32_1(uint32x4_t a, uint32x4_t b) +{ + /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */ + + /* Reference */ + /*return drflac__vdupq_n_s32x4( + vgetq_lane_s32(a, 0), + vgetq_lane_s32(b, 3), + vgetq_lane_s32(b, 2), + vgetq_lane_s32(b, 1) + );*/ + + return vextq_u32(b, a, 1); +} + +static DRFLAC_INLINE int32x2_t drflac__vhaddq_s32(int32x4_t x) +{ + /* The sum must end up in position 0. */ + + /* Reference */ + /*return vdupq_n_s32( + vgetq_lane_s32(x, 3) + + vgetq_lane_s32(x, 2) + + vgetq_lane_s32(x, 1) + + vgetq_lane_s32(x, 0) + );*/ + + int32x2_t r = vadd_s32(vget_high_s32(x), vget_low_s32(x)); + return vpadd_s32(r, r); +} + +static DRFLAC_INLINE int64x1_t drflac__vhaddq_s64(int64x2_t x) +{ + return vadd_s64(vget_high_s64(x), vget_low_s64(x)); +} + +static DRFLAC_INLINE int32x4_t drflac__vrevq_s32(int32x4_t x) +{ + /* Reference */ + /*return drflac__vdupq_n_s32x4( + vgetq_lane_s32(x, 0), + vgetq_lane_s32(x, 1), + vgetq_lane_s32(x, 2), + vgetq_lane_s32(x, 3) + );*/ + + return vrev64q_s32(vcombine_s32(vget_high_s32(x), vget_low_s32(x))); +} + +static DRFLAC_INLINE int32x4_t drflac__vnotq_s32(int32x4_t x) +{ + return veorq_s32(x, vdupq_n_s32(0xFFFFFFFF)); +} + +static DRFLAC_INLINE uint32x4_t drflac__vnotq_u32(uint32x4_t x) +{ + return veorq_u32(x, vdupq_n_u32(0xFFFFFFFF)); +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + int i; + drflac_uint32 riceParamMask; + drflac_int32* pDecodedSamples = pSamplesOut; + drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); + drflac_uint32 zeroCountParts[4]; + drflac_uint32 riceParamParts[4]; + int32x4_t coefficients128_0; + int32x4_t coefficients128_4; + int32x4_t coefficients128_8; + int32x4_t samples128_0; + int32x4_t samples128_4; + int32x4_t samples128_8; + uint32x4_t riceParamMask128; + int32x4_t riceParam128; + int32x2_t shift64; + uint32x4_t one128; + + const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + + riceParamMask = ~((~0UL) << riceParam); + riceParamMask128 = vdupq_n_u32(riceParamMask); + + riceParam128 = vdupq_n_s32(riceParam); + shift64 = vdup_n_s32(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */ + one128 = vdupq_n_u32(1); + + /* + Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than + what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results + in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted + so I think there's opportunity for this to be simplified. + */ + { + int runningOrder = order; + drflac_int32 tempC[4] = {0, 0, 0, 0}; + drflac_int32 tempS[4] = {0, 0, 0, 0}; + + /* 0 - 3. */ + if (runningOrder >= 4) { + coefficients128_0 = vld1q_s32(coefficients + 0); + samples128_0 = vld1q_s32(pSamplesOut - 4); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */ + case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */ + case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */ + } + + coefficients128_0 = vld1q_s32(tempC); + samples128_0 = vld1q_s32(tempS); + runningOrder = 0; + } + + /* 4 - 7 */ + if (runningOrder >= 4) { + coefficients128_4 = vld1q_s32(coefficients + 4); + samples128_4 = vld1q_s32(pSamplesOut - 8); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */ + case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */ + case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */ + } + + coefficients128_4 = vld1q_s32(tempC); + samples128_4 = vld1q_s32(tempS); + runningOrder = 0; + } + + /* 8 - 11 */ + if (runningOrder == 4) { + coefficients128_8 = vld1q_s32(coefficients + 8); + samples128_8 = vld1q_s32(pSamplesOut - 12); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */ + case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */ + case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */ + } + + coefficients128_8 = vld1q_s32(tempC); + samples128_8 = vld1q_s32(tempS); + runningOrder = 0; + } + + /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ + coefficients128_0 = drflac__vrevq_s32(coefficients128_0); + coefficients128_4 = drflac__vrevq_s32(coefficients128_4); + coefficients128_8 = drflac__vrevq_s32(coefficients128_8); + } + + /* For this version we are doing one sample at a time. */ + while (pDecodedSamples < pDecodedSamplesEnd) { + int32x4_t prediction128; + int32x2_t prediction64; + uint32x4_t zeroCountPart128; + uint32x4_t riceParamPart128; + + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) { + return DRFLAC_FALSE; + } + + zeroCountPart128 = vld1q_u32(zeroCountParts); + riceParamPart128 = vld1q_u32(riceParamParts); + + riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128); + riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128)); + riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128)); + + if (order <= 4) { + for (i = 0; i < 4; i += 1) { + prediction128 = vmulq_s32(coefficients128_0, samples128_0); + + /* Horizontal add and shift. */ + prediction64 = drflac__vhaddq_s32(prediction128); + prediction64 = vshl_s32(prediction64, shift64); + prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128))); + + samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0); + riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); + } + } else if (order <= 8) { + for (i = 0; i < 4; i += 1) { + prediction128 = vmulq_s32(coefficients128_4, samples128_4); + prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0); + + /* Horizontal add and shift. */ + prediction64 = drflac__vhaddq_s32(prediction128); + prediction64 = vshl_s32(prediction64, shift64); + prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128))); + + samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4); + samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0); + riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); + } + } else { + for (i = 0; i < 4; i += 1) { + prediction128 = vmulq_s32(coefficients128_8, samples128_8); + prediction128 = vmlaq_s32(prediction128, coefficients128_4, samples128_4); + prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0); + + /* Horizontal add and shift. */ + prediction64 = drflac__vhaddq_s32(prediction128); + prediction64 = vshl_s32(prediction64, shift64); + prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128))); + + samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8); + samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4); + samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0); + riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); + } + } + + /* We store samples in groups of 4. */ + vst1q_s32(pDecodedSamples, samples128_0); + pDecodedSamples += 4; + } + + /* Make sure we process the last few samples. */ + i = (count & ~3); + while (i < (int)count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) { + return DRFLAC_FALSE; + } + + /* Rice reconstruction. */ + riceParamParts[0] &= riceParamMask; + riceParamParts[0] |= (zeroCountParts[0] << riceParam); + riceParamParts[0] = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01]; + + /* Sample reconstruction. */ + pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples); + + i += 1; + pDecodedSamples += 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + int i; + drflac_uint32 riceParamMask; + drflac_int32* pDecodedSamples = pSamplesOut; + drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3); + drflac_uint32 zeroCountParts[4]; + drflac_uint32 riceParamParts[4]; + int32x4_t coefficients128_0; + int32x4_t coefficients128_4; + int32x4_t coefficients128_8; + int32x4_t samples128_0; + int32x4_t samples128_4; + int32x4_t samples128_8; + uint32x4_t riceParamMask128; + int32x4_t riceParam128; + int64x1_t shift64; + uint32x4_t one128; + + const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF}; + + riceParamMask = ~((~0UL) << riceParam); + riceParamMask128 = vdupq_n_u32(riceParamMask); + + riceParam128 = vdupq_n_s32(riceParam); + shift64 = vdup_n_s64(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */ + one128 = vdupq_n_u32(1); + + /* + Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than + what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results + in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted + so I think there's opportunity for this to be simplified. + */ + { + int runningOrder = order; + drflac_int32 tempC[4] = {0, 0, 0, 0}; + drflac_int32 tempS[4] = {0, 0, 0, 0}; + + /* 0 - 3. */ + if (runningOrder >= 4) { + coefficients128_0 = vld1q_s32(coefficients + 0); + samples128_0 = vld1q_s32(pSamplesOut - 4); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */ + case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */ + case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */ + } + + coefficients128_0 = vld1q_s32(tempC); + samples128_0 = vld1q_s32(tempS); + runningOrder = 0; + } + + /* 4 - 7 */ + if (runningOrder >= 4) { + coefficients128_4 = vld1q_s32(coefficients + 4); + samples128_4 = vld1q_s32(pSamplesOut - 8); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */ + case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */ + case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */ + } + + coefficients128_4 = vld1q_s32(tempC); + samples128_4 = vld1q_s32(tempS); + runningOrder = 0; + } + + /* 8 - 11 */ + if (runningOrder == 4) { + coefficients128_8 = vld1q_s32(coefficients + 8); + samples128_8 = vld1q_s32(pSamplesOut - 12); + runningOrder -= 4; + } else { + switch (runningOrder) { + case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */ + case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */ + case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */ + } + + coefficients128_8 = vld1q_s32(tempC); + samples128_8 = vld1q_s32(tempS); + runningOrder = 0; + } + + /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */ + coefficients128_0 = drflac__vrevq_s32(coefficients128_0); + coefficients128_4 = drflac__vrevq_s32(coefficients128_4); + coefficients128_8 = drflac__vrevq_s32(coefficients128_8); + } + + /* For this version we are doing one sample at a time. */ + while (pDecodedSamples < pDecodedSamplesEnd) { + int64x2_t prediction128; + uint32x4_t zeroCountPart128; + uint32x4_t riceParamPart128; + + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) || + !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) { + return DRFLAC_FALSE; + } + + zeroCountPart128 = vld1q_u32(zeroCountParts); + riceParamPart128 = vld1q_u32(riceParamParts); + + riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128); + riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128)); + riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128)); + + for (i = 0; i < 4; i += 1) { + int64x1_t prediction64; + + prediction128 = veorq_s64(prediction128, prediction128); /* Reset to 0. */ + switch (order) + { + case 12: + case 11: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_8), vget_low_s32(samples128_8))); + case 10: + case 9: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_8), vget_high_s32(samples128_8))); + case 8: + case 7: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_4), vget_low_s32(samples128_4))); + case 6: + case 5: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_4), vget_high_s32(samples128_4))); + case 4: + case 3: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_0), vget_low_s32(samples128_0))); + case 2: + case 1: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_0), vget_high_s32(samples128_0))); + } + + /* Horizontal add and shift. */ + prediction64 = drflac__vhaddq_s64(prediction128); + prediction64 = vshl_s64(prediction64, shift64); + prediction64 = vadd_s64(prediction64, vdup_n_s64(vgetq_lane_u32(riceParamPart128, 0))); + + /* Our value should be sitting in prediction64[0]. We need to combine this with our SSE samples. */ + samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8); + samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4); + samples128_0 = drflac__valignrq_s32_1(vcombine_s32(vreinterpret_s32_s64(prediction64), vdup_n_s32(0)), samples128_0); + + /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */ + riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128); + } + + /* We store samples in groups of 4. */ + vst1q_s32(pDecodedSamples, samples128_0); + pDecodedSamples += 4; + } + + /* Make sure we process the last few samples. */ + i = (count & ~3); + while (i < (int)count) { + /* Rice extraction. */ + if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) { + return DRFLAC_FALSE; + } + + /* Rice reconstruction. */ + riceParamParts[0] &= riceParamMask; + riceParamParts[0] |= (zeroCountParts[0] << riceParam); + riceParamParts[0] = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01]; + + /* Sample reconstruction. */ + pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples); + + i += 1; + pDecodedSamples += 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(count > 0); + DRFLAC_ASSERT(pSamplesOut != NULL); + + /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */ + if (order > 0 && order <= 12) { + if (bitsPerSample+shift > 32) { + return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut); + } else { + return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut); + } + } else { + return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + } +} +#endif + +static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ +#if defined(DRFLAC_SUPPORT_SSE41) + if (drflac__gIsSSE41Supported) { + return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported) { + return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + } else +#endif + { + /* Scalar fallback. */ + return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut); + } +} + +/* Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. */ +static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam) +{ + drflac_uint32 i; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(count > 0); + + for (i = 0; i < count; ++i) { + if (!drflac__seek_rice_parts(bs, riceParam)) { + return DRFLAC_FALSE; + } + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut) +{ + drflac_uint32 i; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(count > 0); + DRFLAC_ASSERT(unencodedBitsPerSample <= 31); /* <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. */ + DRFLAC_ASSERT(pSamplesOut != NULL); + + for (i = 0; i < count; ++i) { + if (unencodedBitsPerSample > 0) { + if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) { + return DRFLAC_FALSE; + } + } else { + pSamplesOut[i] = 0; + } + + if (bitsPerSample >= 24) { + pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i); + } else { + pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i); + } + } + + return DRFLAC_TRUE; +} + + +/* +Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called +when the decoder is sitting at the very start of the RESIDUAL block. The first residuals will be ignored. The + and parameters are used to determine how many residual values need to be decoded. +*/ +static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples) +{ + drflac_uint8 residualMethod; + drflac_uint8 partitionOrder; + drflac_uint32 samplesInPartition; + drflac_uint32 partitionsRemaining; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(blockSize != 0); + DRFLAC_ASSERT(pDecodedSamples != NULL); /* <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode? */ + + if (!drflac__read_uint8(bs, 2, &residualMethod)) { + return DRFLAC_FALSE; + } + + if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + return DRFLAC_FALSE; /* Unknown or unsupported residual coding method. */ + } + + /* Ignore the first values. */ + pDecodedSamples += order; + + if (!drflac__read_uint8(bs, 4, &partitionOrder)) { + return DRFLAC_FALSE; + } + + /* + From the FLAC spec: + The Rice partition order in a Rice-coded residual section must be less than or equal to 8. + */ + if (partitionOrder > 8) { + return DRFLAC_FALSE; + } + + /* Validation check. */ + if ((blockSize / (1 << partitionOrder)) <= order) { + return DRFLAC_FALSE; + } + + samplesInPartition = (blockSize / (1 << partitionOrder)) - order; + partitionsRemaining = (1 << partitionOrder); + for (;;) { + drflac_uint8 riceParam = 0; + if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) { + if (!drflac__read_uint8(bs, 4, &riceParam)) { + return DRFLAC_FALSE; + } + if (riceParam == 15) { + riceParam = 0xFF; + } + } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + if (!drflac__read_uint8(bs, 5, &riceParam)) { + return DRFLAC_FALSE; + } + if (riceParam == 31) { + riceParam = 0xFF; + } + } + + if (riceParam != 0xFF) { + if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) { + return DRFLAC_FALSE; + } + } else { + drflac_uint8 unencodedBitsPerSample = 0; + if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) { + return DRFLAC_FALSE; + } + + if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) { + return DRFLAC_FALSE; + } + } + + pDecodedSamples += samplesInPartition; + + if (partitionsRemaining == 1) { + break; + } + + partitionsRemaining -= 1; + + if (partitionOrder != 0) { + samplesInPartition = blockSize / (1 << partitionOrder); + } + } + + return DRFLAC_TRUE; +} + +/* +Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called +when the decoder is sitting at the very start of the RESIDUAL block. The first residuals will be set to 0. The + and parameters are used to determine how many residual values need to be decoded. +*/ +static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 order) +{ + drflac_uint8 residualMethod; + drflac_uint8 partitionOrder; + drflac_uint32 samplesInPartition; + drflac_uint32 partitionsRemaining; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(blockSize != 0); + + if (!drflac__read_uint8(bs, 2, &residualMethod)) { + return DRFLAC_FALSE; + } + + if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + return DRFLAC_FALSE; /* Unknown or unsupported residual coding method. */ + } + + if (!drflac__read_uint8(bs, 4, &partitionOrder)) { + return DRFLAC_FALSE; + } + + /* + From the FLAC spec: + The Rice partition order in a Rice-coded residual section must be less than or equal to 8. + */ + if (partitionOrder > 8) { + return DRFLAC_FALSE; + } + + /* Validation check. */ + if ((blockSize / (1 << partitionOrder)) <= order) { + return DRFLAC_FALSE; + } + + samplesInPartition = (blockSize / (1 << partitionOrder)) - order; + partitionsRemaining = (1 << partitionOrder); + for (;;) + { + drflac_uint8 riceParam = 0; + if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) { + if (!drflac__read_uint8(bs, 4, &riceParam)) { + return DRFLAC_FALSE; + } + if (riceParam == 15) { + riceParam = 0xFF; + } + } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) { + if (!drflac__read_uint8(bs, 5, &riceParam)) { + return DRFLAC_FALSE; + } + if (riceParam == 31) { + riceParam = 0xFF; + } + } + + if (riceParam != 0xFF) { + if (!drflac__read_and_seek_residual__rice(bs, samplesInPartition, riceParam)) { + return DRFLAC_FALSE; + } + } else { + drflac_uint8 unencodedBitsPerSample = 0; + if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) { + return DRFLAC_FALSE; + } + + if (!drflac__seek_bits(bs, unencodedBitsPerSample * samplesInPartition)) { + return DRFLAC_FALSE; + } + } + + + if (partitionsRemaining == 1) { + break; + } + + partitionsRemaining -= 1; + samplesInPartition = blockSize / (1 << partitionOrder); + } + + return DRFLAC_TRUE; +} + + +static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples) +{ + drflac_uint32 i; + + /* Only a single sample needs to be decoded here. */ + drflac_int32 sample; + if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) { + return DRFLAC_FALSE; + } + + /* + We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely) + we'll want to look at a more efficient way. + */ + for (i = 0; i < blockSize; ++i) { + pDecodedSamples[i] = sample; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples) +{ + drflac_uint32 i; + + for (i = 0; i < blockSize; ++i) { + drflac_int32 sample; + if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) { + return DRFLAC_FALSE; + } + + pDecodedSamples[i] = sample; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples) +{ + drflac_uint32 i; + + static drflac_int32 lpcCoefficientsTable[5][4] = { + {0, 0, 0, 0}, + {1, 0, 0, 0}, + {2, -1, 0, 0}, + {3, -3, 1, 0}, + {4, -6, 4, -1} + }; + + /* Warm up samples and coefficients. */ + for (i = 0; i < lpcOrder; ++i) { + drflac_int32 sample; + if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) { + return DRFLAC_FALSE; + } + + pDecodedSamples[i] = sample; + } + + if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) { + return DRFLAC_FALSE; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples) +{ + drflac_uint8 i; + drflac_uint8 lpcPrecision; + drflac_int8 lpcShift; + drflac_int32 coefficients[32]; + + /* Warm up samples. */ + for (i = 0; i < lpcOrder; ++i) { + drflac_int32 sample; + if (!drflac__read_int32(bs, bitsPerSample, &sample)) { + return DRFLAC_FALSE; + } + + pDecodedSamples[i] = sample; + } + + if (!drflac__read_uint8(bs, 4, &lpcPrecision)) { + return DRFLAC_FALSE; + } + if (lpcPrecision == 15) { + return DRFLAC_FALSE; /* Invalid. */ + } + lpcPrecision += 1; + + if (!drflac__read_int8(bs, 5, &lpcShift)) { + return DRFLAC_FALSE; + } + + /* + From the FLAC specification: + + Quantized linear predictor coefficient shift needed in bits (NOTE: this number is signed two's-complement) + + Emphasis on the "signed two's-complement". In practice there does not seem to be any encoders nor decoders supporting negative shifts. For now dr_flac is + not going to support negative shifts as I don't have any reference files. However, when a reference file comes through I will consider adding support. + */ + if (lpcShift < 0) { + return DRFLAC_FALSE; + } + + DRFLAC_ZERO_MEMORY(coefficients, sizeof(coefficients)); + for (i = 0; i < lpcOrder; ++i) { + if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) { + return DRFLAC_FALSE; + } + } + + if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) { + return DRFLAC_FALSE; + } + + return DRFLAC_TRUE; +} + + +static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header) +{ + const drflac_uint32 sampleRateTable[12] = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000}; + const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1}; /* -1 = reserved. */ + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(header != NULL); + + /* Keep looping until we find a valid sync code. */ + for (;;) { + drflac_uint8 crc8 = 0xCE; /* 0xCE = drflac_crc8(0, 0x3FFE, 14); */ + drflac_uint8 reserved = 0; + drflac_uint8 blockingStrategy = 0; + drflac_uint8 blockSize = 0; + drflac_uint8 sampleRate = 0; + drflac_uint8 channelAssignment = 0; + drflac_uint8 bitsPerSample = 0; + drflac_bool32 isVariableBlockSize; + + if (!drflac__find_and_seek_to_next_sync_code(bs)) { + return DRFLAC_FALSE; + } + + if (!drflac__read_uint8(bs, 1, &reserved)) { + return DRFLAC_FALSE; + } + if (reserved == 1) { + continue; + } + crc8 = drflac_crc8(crc8, reserved, 1); + + if (!drflac__read_uint8(bs, 1, &blockingStrategy)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, blockingStrategy, 1); + + if (!drflac__read_uint8(bs, 4, &blockSize)) { + return DRFLAC_FALSE; + } + if (blockSize == 0) { + continue; + } + crc8 = drflac_crc8(crc8, blockSize, 4); + + if (!drflac__read_uint8(bs, 4, &sampleRate)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, sampleRate, 4); + + if (!drflac__read_uint8(bs, 4, &channelAssignment)) { + return DRFLAC_FALSE; + } + if (channelAssignment > 10) { + continue; + } + crc8 = drflac_crc8(crc8, channelAssignment, 4); + + if (!drflac__read_uint8(bs, 3, &bitsPerSample)) { + return DRFLAC_FALSE; + } + if (bitsPerSample == 3 || bitsPerSample == 7) { + continue; + } + crc8 = drflac_crc8(crc8, bitsPerSample, 3); + + + if (!drflac__read_uint8(bs, 1, &reserved)) { + return DRFLAC_FALSE; + } + if (reserved == 1) { + continue; + } + crc8 = drflac_crc8(crc8, reserved, 1); + + + isVariableBlockSize = blockingStrategy == 1; + if (isVariableBlockSize) { + drflac_uint64 pcmFrameNumber; + drflac_result result = drflac__read_utf8_coded_number(bs, &pcmFrameNumber, &crc8); + if (result != DRFLAC_SUCCESS) { + if (result == DRFLAC_AT_END) { + return DRFLAC_FALSE; + } else { + continue; + } + } + header->flacFrameNumber = 0; + header->pcmFrameNumber = pcmFrameNumber; + } else { + drflac_uint64 flacFrameNumber = 0; + drflac_result result = drflac__read_utf8_coded_number(bs, &flacFrameNumber, &crc8); + if (result != DRFLAC_SUCCESS) { + if (result == DRFLAC_AT_END) { + return DRFLAC_FALSE; + } else { + continue; + } + } + header->flacFrameNumber = (drflac_uint32)flacFrameNumber; /* <-- Safe cast. */ + header->pcmFrameNumber = 0; + } + + + DRFLAC_ASSERT(blockSize > 0); + if (blockSize == 1) { + header->blockSizeInPCMFrames = 192; + } else if (blockSize <= 5) { + DRFLAC_ASSERT(blockSize >= 2); + header->blockSizeInPCMFrames = 576 * (1 << (blockSize - 2)); + } else if (blockSize == 6) { + if (!drflac__read_uint16(bs, 8, &header->blockSizeInPCMFrames)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 8); + header->blockSizeInPCMFrames += 1; + } else if (blockSize == 7) { + if (!drflac__read_uint16(bs, 16, &header->blockSizeInPCMFrames)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16); + header->blockSizeInPCMFrames += 1; + } else { + DRFLAC_ASSERT(blockSize >= 8); + header->blockSizeInPCMFrames = 256 * (1 << (blockSize - 8)); + } + + + if (sampleRate <= 11) { + header->sampleRate = sampleRateTable[sampleRate]; + } else if (sampleRate == 12) { + if (!drflac__read_uint32(bs, 8, &header->sampleRate)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->sampleRate, 8); + header->sampleRate *= 1000; + } else if (sampleRate == 13) { + if (!drflac__read_uint32(bs, 16, &header->sampleRate)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->sampleRate, 16); + } else if (sampleRate == 14) { + if (!drflac__read_uint32(bs, 16, &header->sampleRate)) { + return DRFLAC_FALSE; + } + crc8 = drflac_crc8(crc8, header->sampleRate, 16); + header->sampleRate *= 10; + } else { + continue; /* Invalid. Assume an invalid block. */ + } + + + header->channelAssignment = channelAssignment; + + header->bitsPerSample = bitsPerSampleTable[bitsPerSample]; + if (header->bitsPerSample == 0) { + header->bitsPerSample = streaminfoBitsPerSample; + } + + if (!drflac__read_uint8(bs, 8, &header->crc8)) { + return DRFLAC_FALSE; + } + +#ifndef DR_FLAC_NO_CRC + if (header->crc8 != crc8) { + continue; /* CRC mismatch. Loop back to the top and find the next sync code. */ + } +#endif + return DRFLAC_TRUE; + } +} + +static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe) +{ + drflac_uint8 header; + int type; + + if (!drflac__read_uint8(bs, 8, &header)) { + return DRFLAC_FALSE; + } + + /* First bit should always be 0. */ + if ((header & 0x80) != 0) { + return DRFLAC_FALSE; + } + + type = (header & 0x7E) >> 1; + if (type == 0) { + pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT; + } else if (type == 1) { + pSubframe->subframeType = DRFLAC_SUBFRAME_VERBATIM; + } else { + if ((type & 0x20) != 0) { + pSubframe->subframeType = DRFLAC_SUBFRAME_LPC; + pSubframe->lpcOrder = (drflac_uint8)(type & 0x1F) + 1; + } else if ((type & 0x08) != 0) { + pSubframe->subframeType = DRFLAC_SUBFRAME_FIXED; + pSubframe->lpcOrder = (drflac_uint8)(type & 0x07); + if (pSubframe->lpcOrder > 4) { + pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED; + pSubframe->lpcOrder = 0; + } + } else { + pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED; + } + } + + if (pSubframe->subframeType == DRFLAC_SUBFRAME_RESERVED) { + return DRFLAC_FALSE; + } + + /* Wasted bits per sample. */ + pSubframe->wastedBitsPerSample = 0; + if ((header & 0x01) == 1) { + unsigned int wastedBitsPerSample; + if (!drflac__seek_past_next_set_bit(bs, &wastedBitsPerSample)) { + return DRFLAC_FALSE; + } + pSubframe->wastedBitsPerSample = (drflac_uint8)wastedBitsPerSample + 1; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, drflac_int32* pDecodedSamplesOut) +{ + drflac_subframe* pSubframe; + drflac_uint32 subframeBitsPerSample; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(frame != NULL); + + pSubframe = frame->subframes + subframeIndex; + if (!drflac__read_subframe_header(bs, pSubframe)) { + return DRFLAC_FALSE; + } + + /* Side channels require an extra bit per sample. Took a while to figure that one out... */ + subframeBitsPerSample = frame->header.bitsPerSample; + if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) { + subframeBitsPerSample += 1; + } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) { + subframeBitsPerSample += 1; + } + + /* Need to handle wasted bits per sample. */ + if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) { + return DRFLAC_FALSE; + } + subframeBitsPerSample -= pSubframe->wastedBitsPerSample; + + pSubframe->pSamplesS32 = pDecodedSamplesOut; + + switch (pSubframe->subframeType) + { + case DRFLAC_SUBFRAME_CONSTANT: + { + drflac__decode_samples__constant(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32); + } break; + + case DRFLAC_SUBFRAME_VERBATIM: + { + drflac__decode_samples__verbatim(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32); + } break; + + case DRFLAC_SUBFRAME_FIXED: + { + drflac__decode_samples__fixed(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32); + } break; + + case DRFLAC_SUBFRAME_LPC: + { + drflac__decode_samples__lpc(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32); + } break; + + default: return DRFLAC_FALSE; + } + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex) +{ + drflac_subframe* pSubframe; + drflac_uint32 subframeBitsPerSample; + + DRFLAC_ASSERT(bs != NULL); + DRFLAC_ASSERT(frame != NULL); + + pSubframe = frame->subframes + subframeIndex; + if (!drflac__read_subframe_header(bs, pSubframe)) { + return DRFLAC_FALSE; + } + + /* Side channels require an extra bit per sample. Took a while to figure that one out... */ + subframeBitsPerSample = frame->header.bitsPerSample; + if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) { + subframeBitsPerSample += 1; + } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) { + subframeBitsPerSample += 1; + } + + /* Need to handle wasted bits per sample. */ + if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) { + return DRFLAC_FALSE; + } + subframeBitsPerSample -= pSubframe->wastedBitsPerSample; + + pSubframe->pSamplesS32 = NULL; + + switch (pSubframe->subframeType) + { + case DRFLAC_SUBFRAME_CONSTANT: + { + if (!drflac__seek_bits(bs, subframeBitsPerSample)) { + return DRFLAC_FALSE; + } + } break; + + case DRFLAC_SUBFRAME_VERBATIM: + { + unsigned int bitsToSeek = frame->header.blockSizeInPCMFrames * subframeBitsPerSample; + if (!drflac__seek_bits(bs, bitsToSeek)) { + return DRFLAC_FALSE; + } + } break; + + case DRFLAC_SUBFRAME_FIXED: + { + unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample; + if (!drflac__seek_bits(bs, bitsToSeek)) { + return DRFLAC_FALSE; + } + + if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) { + return DRFLAC_FALSE; + } + } break; + + case DRFLAC_SUBFRAME_LPC: + { + drflac_uint8 lpcPrecision; + + unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample; + if (!drflac__seek_bits(bs, bitsToSeek)) { + return DRFLAC_FALSE; + } + + if (!drflac__read_uint8(bs, 4, &lpcPrecision)) { + return DRFLAC_FALSE; + } + if (lpcPrecision == 15) { + return DRFLAC_FALSE; /* Invalid. */ + } + lpcPrecision += 1; + + + bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5; /* +5 for shift. */ + if (!drflac__seek_bits(bs, bitsToSeek)) { + return DRFLAC_FALSE; + } + + if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) { + return DRFLAC_FALSE; + } + } break; + + default: return DRFLAC_FALSE; + } + + return DRFLAC_TRUE; +} + + +static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignment(drflac_int8 channelAssignment) +{ + drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2}; + + DRFLAC_ASSERT(channelAssignment <= 10); + return lookup[channelAssignment]; +} + +static drflac_result drflac__decode_flac_frame(drflac* pFlac) +{ + int channelCount; + int i; + drflac_uint8 paddingSizeInBits; + drflac_uint16 desiredCRC16; +#ifndef DR_FLAC_NO_CRC + drflac_uint16 actualCRC16; +#endif + + /* This function should be called while the stream is sitting on the first byte after the frame header. */ + DRFLAC_ZERO_MEMORY(pFlac->currentFLACFrame.subframes, sizeof(pFlac->currentFLACFrame.subframes)); + + /* The frame block size must never be larger than the maximum block size defined by the FLAC stream. */ + if (pFlac->currentFLACFrame.header.blockSizeInPCMFrames > pFlac->maxBlockSizeInPCMFrames) { + return DRFLAC_ERROR; + } + + /* The number of channels in the frame must match the channel count from the STREAMINFO block. */ + channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + if (channelCount != (int)pFlac->channels) { + return DRFLAC_ERROR; + } + + for (i = 0; i < channelCount; ++i) { + if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i, pFlac->pDecodedSamples + (pFlac->currentFLACFrame.header.blockSizeInPCMFrames * i))) { + return DRFLAC_ERROR; + } + } + + paddingSizeInBits = (drflac_uint8)(DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7); + if (paddingSizeInBits > 0) { + drflac_uint8 padding = 0; + if (!drflac__read_uint8(&pFlac->bs, paddingSizeInBits, &padding)) { + return DRFLAC_AT_END; + } + } + +#ifndef DR_FLAC_NO_CRC + actualCRC16 = drflac__flush_crc16(&pFlac->bs); +#endif + if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) { + return DRFLAC_AT_END; + } + +#ifndef DR_FLAC_NO_CRC + if (actualCRC16 != desiredCRC16) { + return DRFLAC_CRC_MISMATCH; /* CRC mismatch. */ + } +#endif + + pFlac->currentFLACFrame.pcmFramesRemaining = pFlac->currentFLACFrame.header.blockSizeInPCMFrames; + + return DRFLAC_SUCCESS; +} + +static drflac_result drflac__seek_flac_frame(drflac* pFlac) +{ + int channelCount; + int i; + drflac_uint16 desiredCRC16; +#ifndef DR_FLAC_NO_CRC + drflac_uint16 actualCRC16; +#endif + + channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + for (i = 0; i < channelCount; ++i) { + if (!drflac__seek_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i)) { + return DRFLAC_ERROR; + } + } + + /* Padding. */ + if (!drflac__seek_bits(&pFlac->bs, DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7)) { + return DRFLAC_ERROR; + } + + /* CRC. */ +#ifndef DR_FLAC_NO_CRC + actualCRC16 = drflac__flush_crc16(&pFlac->bs); +#endif + if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) { + return DRFLAC_AT_END; + } + +#ifndef DR_FLAC_NO_CRC + if (actualCRC16 != desiredCRC16) { + return DRFLAC_CRC_MISMATCH; /* CRC mismatch. */ + } +#endif + + return DRFLAC_SUCCESS; +} + +static drflac_bool32 drflac__read_and_decode_next_flac_frame(drflac* pFlac) +{ + DRFLAC_ASSERT(pFlac != NULL); + + for (;;) { + drflac_result result; + + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + + result = drflac__decode_flac_frame(pFlac); + if (result != DRFLAC_SUCCESS) { + if (result == DRFLAC_CRC_MISMATCH) { + continue; /* CRC mismatch. Skip to the next frame. */ + } else { + return DRFLAC_FALSE; + } + } + + return DRFLAC_TRUE; + } +} + +static void drflac__get_pcm_frame_range_of_current_flac_frame(drflac* pFlac, drflac_uint64* pFirstPCMFrame, drflac_uint64* pLastPCMFrame) +{ + drflac_uint64 firstPCMFrame; + drflac_uint64 lastPCMFrame; + + DRFLAC_ASSERT(pFlac != NULL); + + firstPCMFrame = pFlac->currentFLACFrame.header.pcmFrameNumber; + if (firstPCMFrame == 0) { + firstPCMFrame = ((drflac_uint64)pFlac->currentFLACFrame.header.flacFrameNumber) * pFlac->maxBlockSizeInPCMFrames; + } + + lastPCMFrame = firstPCMFrame + pFlac->currentFLACFrame.header.blockSizeInPCMFrames; + if (lastPCMFrame > 0) { + lastPCMFrame -= 1; /* Needs to be zero based. */ + } + + if (pFirstPCMFrame) { + *pFirstPCMFrame = firstPCMFrame; + } + if (pLastPCMFrame) { + *pLastPCMFrame = lastPCMFrame; + } +} + +static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac) +{ + drflac_bool32 result; + + DRFLAC_ASSERT(pFlac != NULL); + + result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes); + + DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame)); + pFlac->currentPCMFrame = 0; + + return result; +} + +static DRFLAC_INLINE drflac_result drflac__seek_to_next_flac_frame(drflac* pFlac) +{ + /* This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. */ + DRFLAC_ASSERT(pFlac != NULL); + return drflac__seek_flac_frame(pFlac); +} + + +static drflac_uint64 drflac__seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 pcmFramesToSeek) +{ + drflac_uint64 pcmFramesRead = 0; + while (pcmFramesToSeek > 0) { + if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; /* Couldn't read the next frame, so just break from the loop and return. */ + } + } else { + if (pFlac->currentFLACFrame.pcmFramesRemaining > pcmFramesToSeek) { + pcmFramesRead += pcmFramesToSeek; + pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)pcmFramesToSeek; /* <-- Safe cast. Will always be < currentFrame.pcmFramesRemaining < 65536. */ + pcmFramesToSeek = 0; + } else { + pcmFramesRead += pFlac->currentFLACFrame.pcmFramesRemaining; + pcmFramesToSeek -= pFlac->currentFLACFrame.pcmFramesRemaining; + pFlac->currentFLACFrame.pcmFramesRemaining = 0; + } + } + } + + pFlac->currentPCMFrame += pcmFramesRead; + return pcmFramesRead; +} + + +static drflac_bool32 drflac__seek_to_pcm_frame__brute_force(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + drflac_bool32 isMidFrame = DRFLAC_FALSE; + drflac_uint64 runningPCMFrameCount; + + DRFLAC_ASSERT(pFlac != NULL); + + /* If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file. */ + if (pcmFrameIndex >= pFlac->currentPCMFrame) { + /* Seeking forward. Need to seek from the current position. */ + runningPCMFrameCount = pFlac->currentPCMFrame; + + /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */ + if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } else { + isMidFrame = DRFLAC_TRUE; + } + } else { + /* Seeking backwards. Need to seek from the start of the file. */ + runningPCMFrameCount = 0; + + /* Move back to the start. */ + if (!drflac__seek_to_first_frame(pFlac)) { + return DRFLAC_FALSE; + } + + /* Decode the first frame in preparation for sample-exact seeking below. */ + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } + + /* + We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its + header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame. + */ + for (;;) { + drflac_uint64 pcmFrameCountInThisFLACFrame; + drflac_uint64 firstPCMFrameInFLACFrame = 0; + drflac_uint64 lastPCMFrameInFLACFrame = 0; + + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame); + + pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1; + if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) { + /* + The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend + it never existed and keep iterating. + */ + drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount; + + if (!isMidFrame) { + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */ + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; /* <-- If this fails, something bad has happened (it should never fail). */ + } else { + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* We started seeking mid-frame which means we need to skip the frame decoding part. */ + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; + } + } else { + /* + It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this + frame never existed and leave the running sample count untouched. + */ + if (!isMidFrame) { + drflac_result result = drflac__seek_to_next_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + runningPCMFrameCount += pcmFrameCountInThisFLACFrame; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* + We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with + drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header. + */ + runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining; + pFlac->currentFLACFrame.pcmFramesRemaining = 0; + isMidFrame = DRFLAC_FALSE; + } + + /* If we are seeking to the end of the file and we've just hit it, we're done. */ + if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) { + return DRFLAC_TRUE; + } + } + + next_iteration: + /* Grab the next frame in preparation for the next iteration. */ + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } +} + + +#if !defined(DR_FLAC_NO_CRC) +/* +We use an average compression ratio to determine our approximate start location. FLAC files are generally about 50%-70% the size of their +uncompressed counterparts so we'll use this as a basis. I'm going to split the middle and use a factor of 0.6 to determine the starting +location. +*/ +#define DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO 0.6f + +static drflac_bool32 drflac__seek_to_approximate_flac_frame_to_byte(drflac* pFlac, drflac_uint64 targetByte, drflac_uint64 rangeLo, drflac_uint64 rangeHi, drflac_uint64* pLastSuccessfulSeekOffset) +{ + DRFLAC_ASSERT(pFlac != NULL); + DRFLAC_ASSERT(pLastSuccessfulSeekOffset != NULL); + DRFLAC_ASSERT(targetByte >= rangeLo); + DRFLAC_ASSERT(targetByte <= rangeHi); + + *pLastSuccessfulSeekOffset = pFlac->firstFLACFramePosInBytes; + + for (;;) { + /* After rangeLo == rangeHi == targetByte fails, we need to break out. */ + drflac_uint64 lastTargetByte = targetByte; + + /* When seeking to a byte, failure probably means we've attempted to seek beyond the end of the stream. To counter this we just halve it each attempt. */ + if (!drflac__seek_to_byte(&pFlac->bs, targetByte)) { + /* If we couldn't even seek to the first byte in the stream we have a problem. Just abandon the whole thing. */ + if (targetByte == 0) { + drflac__seek_to_first_frame(pFlac); /* Try to recover. */ + return DRFLAC_FALSE; + } + + /* Halve the byte location and continue. */ + targetByte = rangeLo + ((rangeHi - rangeLo)/2); + rangeHi = targetByte; + } else { + /* Getting here should mean that we have seeked to an appropriate byte. */ + + /* Clear the details of the FLAC frame so we don't misreport data. */ + DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame)); + + /* + Now seek to the next FLAC frame. We need to decode the entire frame (not just the header) because it's possible for the header to incorrectly pass the + CRC check and return bad data. We need to decode the entire frame to be more certain. Although this seems unlikely, this has happened to me in testing + so it needs to stay this way for now. + */ +#if 1 + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + /* Halve the byte location and continue. */ + targetByte = rangeLo + ((rangeHi - rangeLo)/2); + rangeHi = targetByte; + } else { + break; + } +#else + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + /* Halve the byte location and continue. */ + targetByte = rangeLo + ((rangeHi - rangeLo)/2); + rangeHi = targetByte; + } else { + break; + } +#endif + } + + /* We already tried this byte and there are no more to try, break out. */ + if(targetByte == lastTargetByte) { + return DRFLAC_FALSE; + } + } + + /* The current PCM frame needs to be updated based on the frame we just seeked to. */ + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL); + + DRFLAC_ASSERT(targetByte <= rangeHi); + + *pLastSuccessfulSeekOffset = targetByte; + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 offset) +{ + return drflac__seek_forward_by_pcm_frames(pFlac, offset) == offset; +} + + +static drflac_bool32 drflac__seek_to_pcm_frame__binary_search_internal(drflac* pFlac, drflac_uint64 pcmFrameIndex, drflac_uint64 byteRangeLo, drflac_uint64 byteRangeHi) +{ + /* This assumes pFlac->currentPCMFrame is sitting on byteRangeLo upon entry. */ + + drflac_uint64 targetByte; + drflac_uint64 pcmRangeLo = pFlac->totalPCMFrameCount; + drflac_uint64 pcmRangeHi = 0; + drflac_uint64 lastSuccessfulSeekOffset = (drflac_uint64)-1; + drflac_uint64 closestSeekOffsetBeforeTargetPCMFrame = byteRangeLo; + drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096; + + targetByte = byteRangeLo + (drflac_uint64)(((drflac_int64)((pcmFrameIndex - pFlac->currentPCMFrame) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO); + if (targetByte > byteRangeHi) { + targetByte = byteRangeHi; + } + + for (;;) { + if (drflac__seek_to_approximate_flac_frame_to_byte(pFlac, targetByte, byteRangeLo, byteRangeHi, &lastSuccessfulSeekOffset)) { + /* We found a FLAC frame. We need to check if it contains the sample we're looking for. */ + drflac_uint64 newPCMRangeLo; + drflac_uint64 newPCMRangeHi; + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &newPCMRangeLo, &newPCMRangeHi); + + /* If we selected the same frame, it means we should be pretty close. Just decode the rest. */ + if (pcmRangeLo == newPCMRangeLo) { + if (!drflac__seek_to_approximate_flac_frame_to_byte(pFlac, closestSeekOffsetBeforeTargetPCMFrame, closestSeekOffsetBeforeTargetPCMFrame, byteRangeHi, &lastSuccessfulSeekOffset)) { + break; /* Failed to seek to closest frame. */ + } + + if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) { + return DRFLAC_TRUE; + } else { + break; /* Failed to seek forward. */ + } + } + + pcmRangeLo = newPCMRangeLo; + pcmRangeHi = newPCMRangeHi; + + if (pcmRangeLo <= pcmFrameIndex && pcmRangeHi >= pcmFrameIndex) { + /* The target PCM frame is in this FLAC frame. */ + if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame) ) { + return DRFLAC_TRUE; + } else { + break; /* Failed to seek to FLAC frame. */ + } + } else { + const float approxCompressionRatio = (drflac_int64)(lastSuccessfulSeekOffset - pFlac->firstFLACFramePosInBytes) / ((drflac_int64)(pcmRangeLo * pFlac->channels * pFlac->bitsPerSample)/8.0f); + + if (pcmRangeLo > pcmFrameIndex) { + /* We seeked too far forward. We need to move our target byte backward and try again. */ + byteRangeHi = lastSuccessfulSeekOffset; + if (byteRangeLo > byteRangeHi) { + byteRangeLo = byteRangeHi; + } + + targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2); + if (targetByte < byteRangeLo) { + targetByte = byteRangeLo; + } + } else /*if (pcmRangeHi < pcmFrameIndex)*/ { + /* We didn't seek far enough. We need to move our target byte forward and try again. */ + + /* If we're close enough we can just seek forward. */ + if ((pcmFrameIndex - pcmRangeLo) < seekForwardThreshold) { + if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) { + return DRFLAC_TRUE; + } else { + break; /* Failed to seek to FLAC frame. */ + } + } else { + byteRangeLo = lastSuccessfulSeekOffset; + if (byteRangeHi < byteRangeLo) { + byteRangeHi = byteRangeLo; + } + + targetByte = lastSuccessfulSeekOffset + (drflac_uint64)(((drflac_int64)((pcmFrameIndex-pcmRangeLo) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * approxCompressionRatio); + if (targetByte > byteRangeHi) { + targetByte = byteRangeHi; + } + + if (closestSeekOffsetBeforeTargetPCMFrame < lastSuccessfulSeekOffset) { + closestSeekOffsetBeforeTargetPCMFrame = lastSuccessfulSeekOffset; + } + } + } + } + } else { + /* Getting here is really bad. We just recover as best we can, but moving to the first frame in the stream, and then abort. */ + break; + } + } + + drflac__seek_to_first_frame(pFlac); /* <-- Try to recover. */ + return DRFLAC_FALSE; +} + +static drflac_bool32 drflac__seek_to_pcm_frame__binary_search(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + drflac_uint64 byteRangeLo; + drflac_uint64 byteRangeHi; + drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096; + + /* Our algorithm currently assumes the FLAC stream is currently sitting at the start. */ + if (drflac__seek_to_first_frame(pFlac) == DRFLAC_FALSE) { + return DRFLAC_FALSE; + } + + /* If we're close enough to the start, just move to the start and seek forward. */ + if (pcmFrameIndex < seekForwardThreshold) { + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFrameIndex) == pcmFrameIndex; + } + + /* + Our starting byte range is the byte position of the first FLAC frame and the approximate end of the file as if it were completely uncompressed. This ensures + the entire file is included, even though most of the time it'll exceed the end of the actual stream. This is OK as the frame searching logic will handle it. + */ + byteRangeLo = pFlac->firstFLACFramePosInBytes; + byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f); + + return drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi); +} +#endif /* !DR_FLAC_NO_CRC */ + +static drflac_bool32 drflac__seek_to_pcm_frame__seek_table(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + drflac_uint32 iClosestSeekpoint = 0; + drflac_bool32 isMidFrame = DRFLAC_FALSE; + drflac_uint64 runningPCMFrameCount; + drflac_uint32 iSeekpoint; + + + DRFLAC_ASSERT(pFlac != NULL); + + if (pFlac->pSeekpoints == NULL || pFlac->seekpointCount == 0) { + return DRFLAC_FALSE; + } + + for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) { + if (pFlac->pSeekpoints[iSeekpoint].firstPCMFrame >= pcmFrameIndex) { + break; + } + + iClosestSeekpoint = iSeekpoint; + } + + /* There's been cases where the seek table contains only zeros. We need to do some basic validation on the closest seekpoint. */ + if (pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount == 0 || pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount > pFlac->maxBlockSizeInPCMFrames) { + return DRFLAC_FALSE; + } + if (pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame > pFlac->totalPCMFrameCount && pFlac->totalPCMFrameCount > 0) { + return DRFLAC_FALSE; + } + +#if !defined(DR_FLAC_NO_CRC) + /* At this point we should know the closest seek point. We can use a binary search for this. We need to know the total sample count for this. */ + if (pFlac->totalPCMFrameCount > 0) { + drflac_uint64 byteRangeLo; + drflac_uint64 byteRangeHi; + + byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f); + byteRangeLo = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset; + + /* + If our closest seek point is not the last one, we only need to search between it and the next one. The section below calculates an appropriate starting + value for byteRangeHi which will clamp it appropriately. + + Note that the next seekpoint must have an offset greater than the closest seekpoint because otherwise our binary search algorithm will break down. There + have been cases where a seektable consists of seek points where every byte offset is set to 0 which causes problems. If this happens we need to abort. + */ + if (iClosestSeekpoint < pFlac->seekpointCount-1) { + drflac_uint32 iNextSeekpoint = iClosestSeekpoint + 1; + + /* Basic validation on the seekpoints to ensure they're usable. */ + if (pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset >= pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset || pFlac->pSeekpoints[iNextSeekpoint].pcmFrameCount == 0) { + return DRFLAC_FALSE; /* The next seekpoint doesn't look right. The seek table cannot be trusted from here. Abort. */ + } + + if (pFlac->pSeekpoints[iNextSeekpoint].firstPCMFrame != (((drflac_uint64)0xFFFFFFFF << 32) | 0xFFFFFFFF)) { /* Make sure it's not a placeholder seekpoint. */ + byteRangeHi = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset - 1; /* byteRangeHi must be zero based. */ + } + } + + if (drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) { + if (drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL); + + if (drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi)) { + return DRFLAC_TRUE; + } + } + } + } +#endif /* !DR_FLAC_NO_CRC */ + + /* Getting here means we need to use a slower algorithm because the binary search method failed or cannot be used. */ + + /* + If we are seeking forward and the closest seekpoint is _before_ the current sample, we just seek forward from where we are. Otherwise we start seeking + from the seekpoint's first sample. + */ + if (pcmFrameIndex >= pFlac->currentPCMFrame && pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame <= pFlac->currentPCMFrame) { + /* Optimized case. Just seek forward from where we are. */ + runningPCMFrameCount = pFlac->currentPCMFrame; + + /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */ + if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } else { + isMidFrame = DRFLAC_TRUE; + } + } else { + /* Slower case. Seek to the start of the seekpoint and then seek forward from there. */ + runningPCMFrameCount = pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame; + + if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) { + return DRFLAC_FALSE; + } + + /* Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. */ + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } + + for (;;) { + drflac_uint64 pcmFrameCountInThisFLACFrame; + drflac_uint64 firstPCMFrameInFLACFrame = 0; + drflac_uint64 lastPCMFrameInFLACFrame = 0; + + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame); + + pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1; + if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) { + /* + The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend + it never existed and keep iterating. + */ + drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount; + + if (!isMidFrame) { + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */ + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; /* <-- If this fails, something bad has happened (it should never fail). */ + } else { + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* We started seeking mid-frame which means we need to skip the frame decoding part. */ + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; + } + } else { + /* + It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this + frame never existed and leave the running sample count untouched. + */ + if (!isMidFrame) { + drflac_result result = drflac__seek_to_next_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + runningPCMFrameCount += pcmFrameCountInThisFLACFrame; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + goto next_iteration; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* + We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with + drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header. + */ + runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining; + pFlac->currentFLACFrame.pcmFramesRemaining = 0; + isMidFrame = DRFLAC_FALSE; + } + + /* If we are seeking to the end of the file and we've just hit it, we're done. */ + if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) { + return DRFLAC_TRUE; + } + } + + next_iteration: + /* Grab the next frame in preparation for the next iteration. */ + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + } +} + + +#ifndef DR_FLAC_NO_OGG +typedef struct +{ + drflac_uint8 capturePattern[4]; /* Should be "OggS" */ + drflac_uint8 structureVersion; /* Always 0. */ + drflac_uint8 headerType; + drflac_uint64 granulePosition; + drflac_uint32 serialNumber; + drflac_uint32 sequenceNumber; + drflac_uint32 checksum; + drflac_uint8 segmentCount; + drflac_uint8 segmentTable[255]; +} drflac_ogg_page_header; +#endif + +typedef struct +{ + drflac_read_proc onRead; + drflac_seek_proc onSeek; + drflac_meta_proc onMeta; + drflac_container container; + void* pUserData; + void* pUserDataMD; + drflac_uint32 sampleRate; + drflac_uint8 channels; + drflac_uint8 bitsPerSample; + drflac_uint64 totalPCMFrameCount; + drflac_uint16 maxBlockSizeInPCMFrames; + drflac_uint64 runningFilePos; + drflac_bool32 hasStreamInfoBlock; + drflac_bool32 hasMetadataBlocks; + drflac_bs bs; /* <-- A bit streamer is required for loading data during initialization. */ + drflac_frame_header firstFrameHeader; /* <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block. */ + +#ifndef DR_FLAC_NO_OGG + drflac_uint32 oggSerial; + drflac_uint64 oggFirstBytePos; + drflac_ogg_page_header oggBosHeader; +#endif +} drflac_init_info; + +static DRFLAC_INLINE void drflac__decode_block_header(drflac_uint32 blockHeader, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize) +{ + blockHeader = drflac__be2host_32(blockHeader); + *isLastBlock = (drflac_uint8)((blockHeader & 0x80000000UL) >> 31); + *blockType = (drflac_uint8)((blockHeader & 0x7F000000UL) >> 24); + *blockSize = (blockHeader & 0x00FFFFFFUL); +} + +static DRFLAC_INLINE drflac_bool32 drflac__read_and_decode_block_header(drflac_read_proc onRead, void* pUserData, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize) +{ + drflac_uint32 blockHeader; + + *blockSize = 0; + if (onRead(pUserData, &blockHeader, 4) != 4) { + return DRFLAC_FALSE; + } + + drflac__decode_block_header(blockHeader, isLastBlock, blockType, blockSize); + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo) +{ + drflac_uint32 blockSizes; + drflac_uint64 frameSizes = 0; + drflac_uint64 importantProps; + drflac_uint8 md5[16]; + + /* min/max block size. */ + if (onRead(pUserData, &blockSizes, 4) != 4) { + return DRFLAC_FALSE; + } + + /* min/max frame size. */ + if (onRead(pUserData, &frameSizes, 6) != 6) { + return DRFLAC_FALSE; + } + + /* Sample rate, channels, bits per sample and total sample count. */ + if (onRead(pUserData, &importantProps, 8) != 8) { + return DRFLAC_FALSE; + } + + /* MD5 */ + if (onRead(pUserData, md5, sizeof(md5)) != sizeof(md5)) { + return DRFLAC_FALSE; + } + + blockSizes = drflac__be2host_32(blockSizes); + frameSizes = drflac__be2host_64(frameSizes); + importantProps = drflac__be2host_64(importantProps); + + pStreamInfo->minBlockSizeInPCMFrames = (drflac_uint16)((blockSizes & 0xFFFF0000) >> 16); + pStreamInfo->maxBlockSizeInPCMFrames = (drflac_uint16) (blockSizes & 0x0000FFFF); + pStreamInfo->minFrameSizeInPCMFrames = (drflac_uint32)((frameSizes & (((drflac_uint64)0x00FFFFFF << 16) << 24)) >> 40); + pStreamInfo->maxFrameSizeInPCMFrames = (drflac_uint32)((frameSizes & (((drflac_uint64)0x00FFFFFF << 16) << 0)) >> 16); + pStreamInfo->sampleRate = (drflac_uint32)((importantProps & (((drflac_uint64)0x000FFFFF << 16) << 28)) >> 44); + pStreamInfo->channels = (drflac_uint8 )((importantProps & (((drflac_uint64)0x0000000E << 16) << 24)) >> 41) + 1; + pStreamInfo->bitsPerSample = (drflac_uint8 )((importantProps & (((drflac_uint64)0x0000001F << 16) << 20)) >> 36) + 1; + pStreamInfo->totalPCMFrameCount = ((importantProps & ((((drflac_uint64)0x0000000F << 16) << 16) | 0xFFFFFFFF))); + DRFLAC_COPY_MEMORY(pStreamInfo->md5, md5, sizeof(md5)); + + return DRFLAC_TRUE; +} + + +static void* drflac__malloc_default(size_t sz, void* pUserData) +{ + (void)pUserData; + return DRFLAC_MALLOC(sz); +} + +static void* drflac__realloc_default(void* p, size_t sz, void* pUserData) +{ + (void)pUserData; + return DRFLAC_REALLOC(p, sz); +} + +static void drflac__free_default(void* p, void* pUserData) +{ + (void)pUserData; + DRFLAC_FREE(p); +} + + +static void* drflac__malloc_from_callbacks(size_t sz, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks == NULL) { + return NULL; + } + + if (pAllocationCallbacks->onMalloc != NULL) { + return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData); + } + + /* Try using realloc(). */ + if (pAllocationCallbacks->onRealloc != NULL) { + return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData); + } + + return NULL; +} + +static void* drflac__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks == NULL) { + return NULL; + } + + if (pAllocationCallbacks->onRealloc != NULL) { + return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData); + } + + /* Try emulating realloc() in terms of malloc()/free(). */ + if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) { + void* p2; + + p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData); + if (p2 == NULL) { + return NULL; + } + + if (p != NULL) { + DRFLAC_COPY_MEMORY(p2, p, szOld); + pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); + } + + return p2; + } + + return NULL; +} + +static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + if (p == NULL || pAllocationCallbacks == NULL) { + return; + } + + if (pAllocationCallbacks->onFree != NULL) { + pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData); + } +} + + +static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize, drflac_allocation_callbacks* pAllocationCallbacks) +{ + /* + We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that + we'll be sitting on byte 42. + */ + drflac_uint64 runningFilePos = 42; + drflac_uint64 seektablePos = 0; + drflac_uint32 seektableSize = 0; + + for (;;) { + drflac_metadata metadata; + drflac_uint8 isLastBlock = 0; + drflac_uint8 blockType; + drflac_uint32 blockSize; + if (drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize) == DRFLAC_FALSE) { + return DRFLAC_FALSE; + } + runningFilePos += 4; + + metadata.type = blockType; + metadata.pRawData = NULL; + metadata.rawDataSize = 0; + + switch (blockType) + { + case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION: + { + if (blockSize < 4) { + return DRFLAC_FALSE; + } + + if (onMeta) { + void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + metadata.data.application.id = drflac__be2host_32(*(drflac_uint32*)pRawData); + metadata.data.application.pData = (const void*)((drflac_uint8*)pRawData + sizeof(drflac_uint32)); + metadata.data.application.dataSize = blockSize - sizeof(drflac_uint32); + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE: + { + seektablePos = runningFilePos; + seektableSize = blockSize; + + if (onMeta) { + drflac_uint32 iSeekpoint; + void* pRawData; + + pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + metadata.data.seektable.seekpointCount = blockSize/sizeof(drflac_seekpoint); + metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData; + + /* Endian swap. */ + for (iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) { + drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint; + pSeekpoint->firstPCMFrame = drflac__be2host_64(pSeekpoint->firstPCMFrame); + pSeekpoint->flacFrameOffset = drflac__be2host_64(pSeekpoint->flacFrameOffset); + pSeekpoint->pcmFrameCount = drflac__be2host_16(pSeekpoint->pcmFrameCount); + } + + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT: + { + if (blockSize < 8) { + return DRFLAC_FALSE; + } + + if (onMeta) { + void* pRawData; + const char* pRunningData; + const char* pRunningDataEnd; + drflac_uint32 i; + + pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + + pRunningData = (const char*)pRawData; + pRunningDataEnd = (const char*)pRawData + blockSize; + + metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + + /* Need space for the rest of the block */ + if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + metadata.data.vorbis_comment.vendor = pRunningData; pRunningData += metadata.data.vorbis_comment.vendorLength; + metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + + /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */ + if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + metadata.data.vorbis_comment.pComments = pRunningData; + + /* Check that the comments section is valid before passing it to the callback */ + for (i = 0; i < metadata.data.vorbis_comment.commentCount; ++i) { + drflac_uint32 commentLength; + + if (pRunningDataEnd - pRunningData < 4) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + pRunningData += commentLength; + } + + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET: + { + if (blockSize < 396) { + return DRFLAC_FALSE; + } + + if (onMeta) { + void* pRawData; + const char* pRunningData; + const char* pRunningDataEnd; + drflac_uint8 iTrack; + drflac_uint8 iIndex; + + pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + + pRunningData = (const char*)pRawData; + pRunningDataEnd = (const char*)pRawData + blockSize; + + DRFLAC_COPY_MEMORY(metadata.data.cuesheet.catalog, pRunningData, 128); pRunningData += 128; + metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8; + metadata.data.cuesheet.isCD = (pRunningData[0] & 0x80) != 0; pRunningData += 259; + metadata.data.cuesheet.trackCount = pRunningData[0]; pRunningData += 1; + metadata.data.cuesheet.pTrackData = pRunningData; + + /* Check that the cuesheet tracks are valid before passing it to the callback */ + for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) { + drflac_uint8 indexCount; + drflac_uint32 indexPointSize; + + if (pRunningDataEnd - pRunningData < 36) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + /* Skip to the index point count */ + pRunningData += 35; + indexCount = pRunningData[0]; pRunningData += 1; + indexPointSize = indexCount * sizeof(drflac_cuesheet_track_index); + if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + /* Endian swap. */ + for (iIndex = 0; iIndex < indexCount; ++iIndex) { + drflac_cuesheet_track_index* pTrack = (drflac_cuesheet_track_index*)pRunningData; + pRunningData += sizeof(drflac_cuesheet_track_index); + pTrack->offset = drflac__be2host_64(pTrack->offset); + } + } + + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_PICTURE: + { + if (blockSize < 32) { + return DRFLAC_FALSE; + } + + if (onMeta) { + void* pRawData; + const char* pRunningData; + const char* pRunningDataEnd; + + pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + + pRunningData = (const char*)pRawData; + pRunningDataEnd = (const char*)pRawData + blockSize; + + metadata.data.picture.type = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + + /* Need space for the rest of the block */ + if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + metadata.data.picture.mime = pRunningData; pRunningData += metadata.data.picture.mimeLength; + metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + + /* Need space for the rest of the block */ + if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + metadata.data.picture.description = pRunningData; pRunningData += metadata.data.picture.descriptionLength; + metadata.data.picture.width = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.height = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.colorDepth = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + metadata.data.picture.pPictureData = (const drflac_uint8*)pRunningData; + + /* Need space for the picture after the block */ + if (pRunningDataEnd - pRunningData < (drflac_int64)metadata.data.picture.pictureDataSize) { /* <-- Note the order of operations to avoid overflow to a valid value */ + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_PADDING: + { + if (onMeta) { + metadata.data.padding.unused = 0; + + /* Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. */ + if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { + isLastBlock = DRFLAC_TRUE; /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */ + } else { + onMeta(pUserDataMD, &metadata); + } + } + } break; + + case DRFLAC_METADATA_BLOCK_TYPE_INVALID: + { + /* Invalid chunk. Just skip over this one. */ + if (onMeta) { + if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { + isLastBlock = DRFLAC_TRUE; /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */ + } + } + } break; + + default: + { + /* + It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we + can at the very least report the chunk to the application and let it look at the raw data. + */ + if (onMeta) { + void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks); + if (pRawData == NULL) { + return DRFLAC_FALSE; + } + + if (onRead(pUserData, pRawData, blockSize) != blockSize) { + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + return DRFLAC_FALSE; + } + + metadata.pRawData = pRawData; + metadata.rawDataSize = blockSize; + onMeta(pUserDataMD, &metadata); + + drflac__free_from_callbacks(pRawData, pAllocationCallbacks); + } + } break; + } + + /* If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. */ + if (onMeta == NULL && blockSize > 0) { + if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) { + isLastBlock = DRFLAC_TRUE; + } + } + + runningFilePos += blockSize; + if (isLastBlock) { + break; + } + } + + *pSeektablePos = seektablePos; + *pSeektableSize = seektableSize; + *pFirstFramePos = runningFilePos; + + return DRFLAC_TRUE; +} + +static drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed) +{ + /* Pre Condition: The bit stream should be sitting just past the 4-byte id header. */ + + drflac_uint8 isLastBlock; + drflac_uint8 blockType; + drflac_uint32 blockSize; + + (void)onSeek; + + pInit->container = drflac_container_native; + + /* The first metadata block should be the STREAMINFO block. */ + if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { + return DRFLAC_FALSE; + } + + if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) { + if (!relaxed) { + /* We're opening in strict mode and the first block is not the STREAMINFO block. Error. */ + return DRFLAC_FALSE; + } else { + /* + Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined + for that frame. + */ + pInit->hasStreamInfoBlock = DRFLAC_FALSE; + pInit->hasMetadataBlocks = DRFLAC_FALSE; + + if (!drflac__read_next_flac_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) { + return DRFLAC_FALSE; /* Couldn't find a frame. */ + } + + if (pInit->firstFrameHeader.bitsPerSample == 0) { + return DRFLAC_FALSE; /* Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist. */ + } + + pInit->sampleRate = pInit->firstFrameHeader.sampleRate; + pInit->channels = drflac__get_channel_count_from_channel_assignment(pInit->firstFrameHeader.channelAssignment); + pInit->bitsPerSample = pInit->firstFrameHeader.bitsPerSample; + pInit->maxBlockSizeInPCMFrames = 65535; /* <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo */ + return DRFLAC_TRUE; + } + } else { + drflac_streaminfo streaminfo; + if (!drflac__read_streaminfo(onRead, pUserData, &streaminfo)) { + return DRFLAC_FALSE; + } + + pInit->hasStreamInfoBlock = DRFLAC_TRUE; + pInit->sampleRate = streaminfo.sampleRate; + pInit->channels = streaminfo.channels; + pInit->bitsPerSample = streaminfo.bitsPerSample; + pInit->totalPCMFrameCount = streaminfo.totalPCMFrameCount; + pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames; /* Don't care about the min block size - only the max (used for determining the size of the memory allocation). */ + pInit->hasMetadataBlocks = !isLastBlock; + + if (onMeta) { + drflac_metadata metadata; + metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO; + metadata.pRawData = NULL; + metadata.rawDataSize = 0; + metadata.data.streaminfo = streaminfo; + onMeta(pUserDataMD, &metadata); + } + + return DRFLAC_TRUE; + } +} + +#ifndef DR_FLAC_NO_OGG +#define DRFLAC_OGG_MAX_PAGE_SIZE 65307 +#define DRFLAC_OGG_CAPTURE_PATTERN_CRC32 1605413199 /* CRC-32 of "OggS". */ + +typedef enum +{ + drflac_ogg_recover_on_crc_mismatch, + drflac_ogg_fail_on_crc_mismatch +} drflac_ogg_crc_mismatch_recovery; + +#ifndef DR_FLAC_NO_CRC +static drflac_uint32 drflac__crc32_table[] = { + 0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L, + 0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L, + 0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L, + 0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL, + 0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L, + 0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L, + 0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L, + 0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL, + 0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L, + 0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L, + 0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L, + 0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL, + 0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L, + 0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L, + 0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L, + 0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL, + 0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL, + 0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L, + 0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L, + 0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL, + 0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL, + 0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L, + 0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L, + 0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL, + 0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL, + 0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L, + 0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L, + 0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL, + 0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL, + 0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L, + 0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L, + 0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL, + 0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L, + 0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL, + 0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL, + 0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L, + 0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L, + 0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL, + 0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL, + 0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L, + 0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L, + 0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL, + 0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL, + 0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L, + 0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L, + 0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL, + 0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL, + 0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L, + 0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L, + 0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL, + 0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L, + 0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L, + 0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L, + 0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL, + 0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L, + 0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L, + 0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L, + 0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL, + 0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L, + 0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L, + 0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L, + 0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL, + 0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L, + 0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L +}; +#endif + +static DRFLAC_INLINE drflac_uint32 drflac_crc32_byte(drflac_uint32 crc32, drflac_uint8 data) +{ +#ifndef DR_FLAC_NO_CRC + return (crc32 << 8) ^ drflac__crc32_table[(drflac_uint8)((crc32 >> 24) & 0xFF) ^ data]; +#else + (void)data; + return crc32; +#endif +} + +static DRFLAC_INLINE drflac_uint32 drflac_crc32_buffer(drflac_uint32 crc32, drflac_uint8* pData, drflac_uint32 dataSize) +{ + /* This can be optimized. */ + drflac_uint32 i; + for (i = 0; i < dataSize; ++i) { + crc32 = drflac_crc32_byte(crc32, pData[i]); + } + return crc32; +} + + +static DRFLAC_INLINE drflac_bool32 drflac_ogg__is_capture_pattern(drflac_uint8 pattern[4]) +{ + return pattern[0] == 'O' && pattern[1] == 'g' && pattern[2] == 'g' && pattern[3] == 'S'; +} + +static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_header_size(drflac_ogg_page_header* pHeader) +{ + return 27 + pHeader->segmentCount; +} + +static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader) +{ + drflac_uint32 pageBodySize = 0; + int i; + + for (i = 0; i < pHeader->segmentCount; ++i) { + pageBodySize += pHeader->segmentTable[i]; + } + + return pageBodySize; +} + +static drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32) +{ + drflac_uint8 data[23]; + drflac_uint32 i; + + DRFLAC_ASSERT(*pCRC32 == DRFLAC_OGG_CAPTURE_PATTERN_CRC32); + + if (onRead(pUserData, data, 23) != 23) { + return DRFLAC_AT_END; + } + *pBytesRead += 23; + + /* + It's not actually used, but set the capture pattern to 'OggS' for completeness. Not doing this will cause static analysers to complain about + us trying to access uninitialized data. We could alternatively just comment out this member of the drflac_ogg_page_header structure, but I + like to have it map to the structure of the underlying data. + */ + pHeader->capturePattern[0] = 'O'; + pHeader->capturePattern[1] = 'g'; + pHeader->capturePattern[2] = 'g'; + pHeader->capturePattern[3] = 'S'; + + pHeader->structureVersion = data[0]; + pHeader->headerType = data[1]; + DRFLAC_COPY_MEMORY(&pHeader->granulePosition, &data[ 2], 8); + DRFLAC_COPY_MEMORY(&pHeader->serialNumber, &data[10], 4); + DRFLAC_COPY_MEMORY(&pHeader->sequenceNumber, &data[14], 4); + DRFLAC_COPY_MEMORY(&pHeader->checksum, &data[18], 4); + pHeader->segmentCount = data[22]; + + /* Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0. */ + data[18] = 0; + data[19] = 0; + data[20] = 0; + data[21] = 0; + + for (i = 0; i < 23; ++i) { + *pCRC32 = drflac_crc32_byte(*pCRC32, data[i]); + } + + + if (onRead(pUserData, pHeader->segmentTable, pHeader->segmentCount) != pHeader->segmentCount) { + return DRFLAC_AT_END; + } + *pBytesRead += pHeader->segmentCount; + + for (i = 0; i < pHeader->segmentCount; ++i) { + *pCRC32 = drflac_crc32_byte(*pCRC32, pHeader->segmentTable[i]); + } + + return DRFLAC_SUCCESS; +} + +static drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32) +{ + drflac_uint8 id[4]; + + *pBytesRead = 0; + + if (onRead(pUserData, id, 4) != 4) { + return DRFLAC_AT_END; + } + *pBytesRead += 4; + + /* We need to read byte-by-byte until we find the OggS capture pattern. */ + for (;;) { + if (drflac_ogg__is_capture_pattern(id)) { + drflac_result result; + + *pCRC32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32; + + result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32); + if (result == DRFLAC_SUCCESS) { + return DRFLAC_SUCCESS; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + continue; + } else { + return result; + } + } + } else { + /* The first 4 bytes did not equal the capture pattern. Read the next byte and try again. */ + id[0] = id[1]; + id[1] = id[2]; + id[2] = id[3]; + if (onRead(pUserData, &id[3], 1) != 1) { + return DRFLAC_AT_END; + } + *pBytesRead += 1; + } + } +} + + +/* +The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works +in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed +in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type +dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from +the physical Ogg bitstream are converted and delivered in native FLAC format. +*/ +typedef struct +{ + drflac_read_proc onRead; /* The original onRead callback from drflac_open() and family. */ + drflac_seek_proc onSeek; /* The original onSeek callback from drflac_open() and family. */ + void* pUserData; /* The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family. */ + drflac_uint64 currentBytePos; /* The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking. */ + drflac_uint64 firstBytePos; /* The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page. */ + drflac_uint32 serialNumber; /* The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization. */ + drflac_ogg_page_header bosPageHeader; /* Used for seeking. */ + drflac_ogg_page_header currentPageHeader; + drflac_uint32 bytesRemainingInPage; + drflac_uint32 pageDataSize; + drflac_uint8 pageData[DRFLAC_OGG_MAX_PAGE_SIZE]; +} drflac_oggbs; /* oggbs = Ogg Bitstream */ + +static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut, size_t bytesToRead) +{ + size_t bytesActuallyRead = oggbs->onRead(oggbs->pUserData, bufferOut, bytesToRead); + oggbs->currentBytePos += bytesActuallyRead; + + return bytesActuallyRead; +} + +static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uint64 offset, drflac_seek_origin origin) +{ + if (origin == drflac_seek_origin_start) { + if (offset <= 0x7FFFFFFF) { + if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_start)) { + return DRFLAC_FALSE; + } + oggbs->currentBytePos = offset; + + return DRFLAC_TRUE; + } else { + if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) { + return DRFLAC_FALSE; + } + oggbs->currentBytePos = offset; + + return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, drflac_seek_origin_current); + } + } else { + while (offset > 0x7FFFFFFF) { + if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + oggbs->currentBytePos += 0x7FFFFFFF; + offset -= 0x7FFFFFFF; + } + + if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) { /* <-- Safe cast thanks to the loop above. */ + return DRFLAC_FALSE; + } + oggbs->currentBytePos += offset; + + return DRFLAC_TRUE; + } +} + +static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_ogg_crc_mismatch_recovery recoveryMethod) +{ + drflac_ogg_page_header header; + for (;;) { + drflac_uint32 crc32 = 0; + drflac_uint32 bytesRead; + drflac_uint32 pageBodySize; +#ifndef DR_FLAC_NO_CRC + drflac_uint32 actualCRC32; +#endif + + if (drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { + return DRFLAC_FALSE; + } + oggbs->currentBytePos += bytesRead; + + pageBodySize = drflac_ogg__get_page_body_size(&header); + if (pageBodySize > DRFLAC_OGG_MAX_PAGE_SIZE) { + continue; /* Invalid page size. Assume it's corrupted and just move to the next page. */ + } + + if (header.serialNumber != oggbs->serialNumber) { + /* It's not a FLAC page. Skip it. */ + if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + continue; + } + + + /* We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page. */ + if (drflac_oggbs__read_physical(oggbs, oggbs->pageData, pageBodySize) != pageBodySize) { + return DRFLAC_FALSE; + } + oggbs->pageDataSize = pageBodySize; + +#ifndef DR_FLAC_NO_CRC + actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize); + if (actualCRC32 != header.checksum) { + if (recoveryMethod == drflac_ogg_recover_on_crc_mismatch) { + continue; /* CRC mismatch. Skip this page. */ + } else { + /* + Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we + go to the next valid page to ensure we're in a good state, but return false to let the caller know that the + seek did not fully complete. + */ + drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch); + return DRFLAC_FALSE; + } + } +#else + (void)recoveryMethod; /* <-- Silence a warning. */ +#endif + + oggbs->currentPageHeader = header; + oggbs->bytesRemainingInPage = pageBodySize; + return DRFLAC_TRUE; + } +} + +static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytesToRead) +{ + drflac_oggbs* oggbs = (drflac_oggbs*)pUserData; + drflac_uint8* pRunningBufferOut = (drflac_uint8*)bufferOut; + size_t bytesRead = 0; + + DRFLAC_ASSERT(oggbs != NULL); + DRFLAC_ASSERT(pRunningBufferOut != NULL); + + /* Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one. */ + while (bytesRead < bytesToRead) { + size_t bytesRemainingToRead = bytesToRead - bytesRead; + + if (oggbs->bytesRemainingInPage >= bytesRemainingToRead) { + DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), bytesRemainingToRead); + bytesRead += bytesRemainingToRead; + oggbs->bytesRemainingInPage -= (drflac_uint32)bytesRemainingToRead; + break; + } + + /* If we get here it means some of the requested data is contained in the next pages. */ + if (oggbs->bytesRemainingInPage > 0) { + DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), oggbs->bytesRemainingInPage); + bytesRead += oggbs->bytesRemainingInPage; + pRunningBufferOut += oggbs->bytesRemainingInPage; + oggbs->bytesRemainingInPage = 0; + } + + DRFLAC_ASSERT(bytesRemainingToRead > 0); + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { + break; /* Failed to go to the next page. Might have simply hit the end of the stream. */ + } + } + + return bytesRead; +} + +static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin) +{ + drflac_oggbs* oggbs = (drflac_oggbs*)pUserData; + int bytesSeeked = 0; + + DRFLAC_ASSERT(oggbs != NULL); + DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */ + + /* Seeking is always forward which makes things a lot simpler. */ + if (origin == drflac_seek_origin_start) { + if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, drflac_seek_origin_start)) { + return DRFLAC_FALSE; + } + + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) { + return DRFLAC_FALSE; + } + + return drflac__on_seek_ogg(pUserData, offset, drflac_seek_origin_current); + } + + DRFLAC_ASSERT(origin == drflac_seek_origin_current); + + while (bytesSeeked < offset) { + int bytesRemainingToSeek = offset - bytesSeeked; + DRFLAC_ASSERT(bytesRemainingToSeek >= 0); + + if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) { + bytesSeeked += bytesRemainingToSeek; + (void)bytesSeeked; /* <-- Silence a dead store warning emitted by Clang Static Analyzer. */ + oggbs->bytesRemainingInPage -= bytesRemainingToSeek; + break; + } + + /* If we get here it means some of the requested data is contained in the next pages. */ + if (oggbs->bytesRemainingInPage > 0) { + bytesSeeked += (int)oggbs->bytesRemainingInPage; + oggbs->bytesRemainingInPage = 0; + } + + DRFLAC_ASSERT(bytesRemainingToSeek > 0); + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) { + /* Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. */ + return DRFLAC_FALSE; + } + } + + return DRFLAC_TRUE; +} + + +static drflac_bool32 drflac_ogg__seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; + drflac_uint64 originalBytePos; + drflac_uint64 runningGranulePosition; + drflac_uint64 runningFrameBytePos; + drflac_uint64 runningPCMFrameCount; + + DRFLAC_ASSERT(oggbs != NULL); + + originalBytePos = oggbs->currentBytePos; /* For recovery. Points to the OggS identifier. */ + + /* First seek to the first frame. */ + if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes)) { + return DRFLAC_FALSE; + } + oggbs->bytesRemainingInPage = 0; + + runningGranulePosition = 0; + for (;;) { + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { + drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start); + return DRFLAC_FALSE; /* Never did find that sample... */ + } + + runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader) - oggbs->pageDataSize; + if (oggbs->currentPageHeader.granulePosition >= pcmFrameIndex) { + break; /* The sample is somewhere in the previous page. */ + } + + /* + At this point we know the sample is not in the previous page. It could possibly be in this page. For simplicity we + disregard any pages that do not begin a fresh packet. + */ + if ((oggbs->currentPageHeader.headerType & 0x01) == 0) { /* <-- Is it a fresh page? */ + if (oggbs->currentPageHeader.segmentTable[0] >= 2) { + drflac_uint8 firstBytesInPage[2]; + firstBytesInPage[0] = oggbs->pageData[0]; + firstBytesInPage[1] = oggbs->pageData[1]; + + if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) { /* <-- Does the page begin with a frame's sync code? */ + runningGranulePosition = oggbs->currentPageHeader.granulePosition; + } + + continue; + } + } + } + + /* + We found the page that that is closest to the sample, so now we need to find it. The first thing to do is seek to the + start of that page. In the loop above we checked that it was a fresh page which means this page is also the start of + a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until + we find the one containing the target sample. + */ + if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, drflac_seek_origin_start)) { + return DRFLAC_FALSE; + } + if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) { + return DRFLAC_FALSE; + } + + /* + At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep + looping over these frames until we find the one containing the sample we're after. + */ + runningPCMFrameCount = runningGranulePosition; + for (;;) { + /* + There are two ways to find the sample and seek past irrelevant frames: + 1) Use the native FLAC decoder. + 2) Use Ogg's framing system. + + Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to + do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code + duplication for the decoding of frame headers. + + Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg + bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the + standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks + the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read + using the native FLAC decoding APIs, such as drflac__read_next_flac_frame_header(), need to be re-implemented so as to + avoid the use of the drflac_bs object. + + Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons: + 1) Seeking is already partially accelerated using Ogg's paging system in the code block above. + 2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon. + 3) Simplicity. + */ + drflac_uint64 firstPCMFrameInFLACFrame = 0; + drflac_uint64 lastPCMFrameInFLACFrame = 0; + drflac_uint64 pcmFrameCountInThisFrame; + + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + return DRFLAC_FALSE; + } + + drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame); + + pcmFrameCountInThisFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1; + + /* If we are seeking to the end of the file and we've just hit it, we're done. */ + if (pcmFrameIndex == pFlac->totalPCMFrameCount && (runningPCMFrameCount + pcmFrameCountInThisFrame) == pFlac->totalPCMFrameCount) { + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + pFlac->currentPCMFrame = pcmFrameIndex; + pFlac->currentFLACFrame.pcmFramesRemaining = 0; + return DRFLAC_TRUE; + } else { + return DRFLAC_FALSE; + } + } + + if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFrame)) { + /* + The sample should be in this FLAC frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend + it never existed and keep iterating. + */ + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */ + drflac_uint64 pcmFramesToDecode = (size_t)(pcmFrameIndex - runningPCMFrameCount); /* <-- Safe cast because the maximum number of samples in a frame is 65535. */ + if (pcmFramesToDecode == 0) { + return DRFLAC_TRUE; + } + + pFlac->currentPCMFrame = runningPCMFrameCount; + + return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode; /* <-- If this fails, something bad has happened (it should never fail). */ + } else { + if (result == DRFLAC_CRC_MISMATCH) { + continue; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } else { + /* + It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this + frame never existed and leave the running sample count untouched. + */ + drflac_result result = drflac__seek_to_next_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + runningPCMFrameCount += pcmFrameCountInThisFrame; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + continue; /* CRC mismatch. Pretend this frame never existed. */ + } else { + return DRFLAC_FALSE; + } + } + } + } +} + + + +static drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed) +{ + drflac_ogg_page_header header; + drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32; + drflac_uint32 bytesRead = 0; + + /* Pre Condition: The bit stream should be sitting just past the 4-byte OggS capture pattern. */ + (void)relaxed; + + pInit->container = drflac_container_ogg; + pInit->oggFirstBytePos = 0; + + /* + We'll get here if the first 4 bytes of the stream were the OggS capture pattern, however it doesn't necessarily mean the + stream includes FLAC encoded audio. To check for this we need to scan the beginning-of-stream page markers and check if + any match the FLAC specification. Important to keep in mind that the stream may be multiplexed. + */ + if (drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { + return DRFLAC_FALSE; + } + pInit->runningFilePos += bytesRead; + + for (;;) { + int pageBodySize; + + /* Break if we're past the beginning of stream page. */ + if ((header.headerType & 0x02) == 0) { + return DRFLAC_FALSE; + } + + /* Check if it's a FLAC header. */ + pageBodySize = drflac_ogg__get_page_body_size(&header); + if (pageBodySize == 51) { /* 51 = the lacing value of the FLAC header packet. */ + /* It could be a FLAC page... */ + drflac_uint32 bytesRemainingInPage = pageBodySize; + drflac_uint8 packetType; + + if (onRead(pUserData, &packetType, 1) != 1) { + return DRFLAC_FALSE; + } + + bytesRemainingInPage -= 1; + if (packetType == 0x7F) { + /* Increasingly more likely to be a FLAC page... */ + drflac_uint8 sig[4]; + if (onRead(pUserData, sig, 4) != 4) { + return DRFLAC_FALSE; + } + + bytesRemainingInPage -= 4; + if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C') { + /* Almost certainly a FLAC page... */ + drflac_uint8 mappingVersion[2]; + if (onRead(pUserData, mappingVersion, 2) != 2) { + return DRFLAC_FALSE; + } + + if (mappingVersion[0] != 1) { + return DRFLAC_FALSE; /* Only supporting version 1.x of the Ogg mapping. */ + } + + /* + The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to + be handling it in a generic way based on the serial number and packet types. + */ + if (!onSeek(pUserData, 2, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + + /* Expecting the native FLAC signature "fLaC". */ + if (onRead(pUserData, sig, 4) != 4) { + return DRFLAC_FALSE; + } + + if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C') { + /* The remaining data in the page should be the STREAMINFO block. */ + drflac_streaminfo streaminfo; + drflac_uint8 isLastBlock; + drflac_uint8 blockType; + drflac_uint32 blockSize; + if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) { + return DRFLAC_FALSE; + } + + if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) { + return DRFLAC_FALSE; /* Invalid block type. First block must be the STREAMINFO block. */ + } + + if (drflac__read_streaminfo(onRead, pUserData, &streaminfo)) { + /* Success! */ + pInit->hasStreamInfoBlock = DRFLAC_TRUE; + pInit->sampleRate = streaminfo.sampleRate; + pInit->channels = streaminfo.channels; + pInit->bitsPerSample = streaminfo.bitsPerSample; + pInit->totalPCMFrameCount = streaminfo.totalPCMFrameCount; + pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames; + pInit->hasMetadataBlocks = !isLastBlock; + + if (onMeta) { + drflac_metadata metadata; + metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO; + metadata.pRawData = NULL; + metadata.rawDataSize = 0; + metadata.data.streaminfo = streaminfo; + onMeta(pUserDataMD, &metadata); + } + + pInit->runningFilePos += pageBodySize; + pInit->oggFirstBytePos = pInit->runningFilePos - 79; /* Subtracting 79 will place us right on top of the "OggS" identifier of the FLAC bos page. */ + pInit->oggSerial = header.serialNumber; + pInit->oggBosHeader = header; + break; + } else { + /* Failed to read STREAMINFO block. Aww, so close... */ + return DRFLAC_FALSE; + } + } else { + /* Invalid file. */ + return DRFLAC_FALSE; + } + } else { + /* Not a FLAC header. Skip it. */ + if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + } + } else { + /* Not a FLAC header. Seek past the entire page and move on to the next. */ + if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + } + } else { + if (!onSeek(pUserData, pageBodySize, drflac_seek_origin_current)) { + return DRFLAC_FALSE; + } + } + + pInit->runningFilePos += pageBodySize; + + + /* Read the header of the next page. */ + if (drflac_ogg__read_page_header(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) { + return DRFLAC_FALSE; + } + pInit->runningFilePos += bytesRead; + } + + /* + If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next + packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the + Ogg bistream object. + */ + pInit->hasMetadataBlocks = DRFLAC_TRUE; /* <-- Always have at least VORBIS_COMMENT metadata block. */ + return DRFLAC_TRUE; +} +#endif + +static drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD) +{ + drflac_bool32 relaxed; + drflac_uint8 id[4]; + + if (pInit == NULL || onRead == NULL || onSeek == NULL) { + return DRFLAC_FALSE; + } + + DRFLAC_ZERO_MEMORY(pInit, sizeof(*pInit)); + pInit->onRead = onRead; + pInit->onSeek = onSeek; + pInit->onMeta = onMeta; + pInit->container = container; + pInit->pUserData = pUserData; + pInit->pUserDataMD = pUserDataMD; + + pInit->bs.onRead = onRead; + pInit->bs.onSeek = onSeek; + pInit->bs.pUserData = pUserData; + drflac__reset_cache(&pInit->bs); + + + /* If the container is explicitly defined then we can try opening in relaxed mode. */ + relaxed = container != drflac_container_unknown; + + /* Skip over any ID3 tags. */ + for (;;) { + if (onRead(pUserData, id, 4) != 4) { + return DRFLAC_FALSE; /* Ran out of data. */ + } + pInit->runningFilePos += 4; + + if (id[0] == 'I' && id[1] == 'D' && id[2] == '3') { + drflac_uint8 header[6]; + drflac_uint8 flags; + drflac_uint32 headerSize; + + if (onRead(pUserData, header, 6) != 6) { + return DRFLAC_FALSE; /* Ran out of data. */ + } + pInit->runningFilePos += 6; + + flags = header[1]; + + DRFLAC_COPY_MEMORY(&headerSize, header+2, 4); + headerSize = drflac__unsynchsafe_32(drflac__be2host_32(headerSize)); + if (flags & 0x10) { + headerSize += 10; + } + + if (!onSeek(pUserData, headerSize, drflac_seek_origin_current)) { + return DRFLAC_FALSE; /* Failed to seek past the tag. */ + } + pInit->runningFilePos += headerSize; + } else { + break; + } + } + + if (id[0] == 'f' && id[1] == 'L' && id[2] == 'a' && id[3] == 'C') { + return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); + } +#ifndef DR_FLAC_NO_OGG + if (id[0] == 'O' && id[1] == 'g' && id[2] == 'g' && id[3] == 'S') { + return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); + } +#endif + + /* If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable. */ + if (relaxed) { + if (container == drflac_container_native) { + return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); + } +#ifndef DR_FLAC_NO_OGG + if (container == drflac_container_ogg) { + return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed); + } +#endif + } + + /* Unsupported container. */ + return DRFLAC_FALSE; +} + +static void drflac__init_from_info(drflac* pFlac, const drflac_init_info* pInit) +{ + DRFLAC_ASSERT(pFlac != NULL); + DRFLAC_ASSERT(pInit != NULL); + + DRFLAC_ZERO_MEMORY(pFlac, sizeof(*pFlac)); + pFlac->bs = pInit->bs; + pFlac->onMeta = pInit->onMeta; + pFlac->pUserDataMD = pInit->pUserDataMD; + pFlac->maxBlockSizeInPCMFrames = pInit->maxBlockSizeInPCMFrames; + pFlac->sampleRate = pInit->sampleRate; + pFlac->channels = (drflac_uint8)pInit->channels; + pFlac->bitsPerSample = (drflac_uint8)pInit->bitsPerSample; + pFlac->totalPCMFrameCount = pInit->totalPCMFrameCount; + pFlac->container = pInit->container; +} + + +static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac_init_info init; + drflac_uint32 allocationSize; + drflac_uint32 wholeSIMDVectorCountPerChannel; + drflac_uint32 decodedSamplesAllocationSize; +#ifndef DR_FLAC_NO_OGG + drflac_oggbs oggbs; +#endif + drflac_uint64 firstFramePos; + drflac_uint64 seektablePos; + drflac_uint32 seektableSize; + drflac_allocation_callbacks allocationCallbacks; + drflac* pFlac; + + /* CPU support first. */ + drflac__init_cpu_caps(); + + if (!drflac__init_private(&init, onRead, onSeek, onMeta, container, pUserData, pUserDataMD)) { + return NULL; + } + + if (pAllocationCallbacks != NULL) { + allocationCallbacks = *pAllocationCallbacks; + if (allocationCallbacks.onFree == NULL || (allocationCallbacks.onMalloc == NULL && allocationCallbacks.onRealloc == NULL)) { + return NULL; /* Invalid allocation callbacks. */ + } + } else { + allocationCallbacks.pUserData = NULL; + allocationCallbacks.onMalloc = drflac__malloc_default; + allocationCallbacks.onRealloc = drflac__realloc_default; + allocationCallbacks.onFree = drflac__free_default; + } + + + /* + The size of the allocation for the drflac object needs to be large enough to fit the following: + 1) The main members of the drflac structure + 2) A block of memory large enough to store the decoded samples of the largest frame in the stream + 3) If the container is Ogg, a drflac_oggbs object + + The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration + the different SIMD instruction sets. + */ + allocationSize = sizeof(drflac); + + /* + The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector + we are supporting. + */ + if ((init.maxBlockSizeInPCMFrames % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) { + wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))); + } else { + wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1; + } + + decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels; + + allocationSize += decodedSamplesAllocationSize; + allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE; /* Allocate extra bytes to ensure we have enough for alignment. */ + +#ifndef DR_FLAC_NO_OGG + /* There's additional data required for Ogg streams. */ + if (init.container == drflac_container_ogg) { + allocationSize += sizeof(drflac_oggbs); + } + + DRFLAC_ZERO_MEMORY(&oggbs, sizeof(oggbs)); + if (init.container == drflac_container_ogg) { + oggbs.onRead = onRead; + oggbs.onSeek = onSeek; + oggbs.pUserData = pUserData; + oggbs.currentBytePos = init.oggFirstBytePos; + oggbs.firstBytePos = init.oggFirstBytePos; + oggbs.serialNumber = init.oggSerial; + oggbs.bosPageHeader = init.oggBosHeader; + oggbs.bytesRemainingInPage = 0; + } +#endif + + /* + This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to + consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading + and decoding the metadata. + */ + firstFramePos = 42; /* <-- We know we are at byte 42 at this point. */ + seektablePos = 0; + seektableSize = 0; + if (init.hasMetadataBlocks) { + drflac_read_proc onReadOverride = onRead; + drflac_seek_proc onSeekOverride = onSeek; + void* pUserDataOverride = pUserData; + +#ifndef DR_FLAC_NO_OGG + if (init.container == drflac_container_ogg) { + onReadOverride = drflac__on_read_ogg; + onSeekOverride = drflac__on_seek_ogg; + pUserDataOverride = (void*)&oggbs; + } +#endif + + if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize, &allocationCallbacks)) { + return NULL; + } + + allocationSize += seektableSize; + } + + + pFlac = (drflac*)drflac__malloc_from_callbacks(allocationSize, &allocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + drflac__init_from_info(pFlac, &init); + pFlac->allocationCallbacks = allocationCallbacks; + pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE); + +#ifndef DR_FLAC_NO_OGG + if (init.container == drflac_container_ogg) { + drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize); + *pInternalOggbs = oggbs; + + /* The Ogg bistream needs to be layered on top of the original bitstream. */ + pFlac->bs.onRead = drflac__on_read_ogg; + pFlac->bs.onSeek = drflac__on_seek_ogg; + pFlac->bs.pUserData = (void*)pInternalOggbs; + pFlac->_oggbs = (void*)pInternalOggbs; + } +#endif + + pFlac->firstFLACFramePosInBytes = firstFramePos; + + /* NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. */ +#ifndef DR_FLAC_NO_OGG + if (init.container == drflac_container_ogg) + { + pFlac->pSeekpoints = NULL; + pFlac->seekpointCount = 0; + } + else +#endif + { + /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */ + if (seektablePos != 0) { + pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints); + pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize); + + DRFLAC_ASSERT(pFlac->bs.onSeek != NULL); + DRFLAC_ASSERT(pFlac->bs.onRead != NULL); + + /* Seek to the seektable, then just read directly into our seektable buffer. */ + if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) { + if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) { + /* Endian swap. */ + drflac_uint32 iSeekpoint; + for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) { + pFlac->pSeekpoints[iSeekpoint].firstPCMFrame = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstPCMFrame); + pFlac->pSeekpoints[iSeekpoint].flacFrameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].flacFrameOffset); + pFlac->pSeekpoints[iSeekpoint].pcmFrameCount = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].pcmFrameCount); + } + } else { + /* Failed to read the seektable. Pretend we don't have one. */ + pFlac->pSeekpoints = NULL; + pFlac->seekpointCount = 0; + } + + /* We need to seek back to where we were. If this fails it's a critical error. */ + if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFLACFramePosInBytes, drflac_seek_origin_start)) { + drflac__free_from_callbacks(pFlac, &allocationCallbacks); + return NULL; + } + } else { + /* Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one. */ + pFlac->pSeekpoints = NULL; + pFlac->seekpointCount = 0; + } + } + } + + + /* + If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode + the first frame. + */ + if (!init.hasStreamInfoBlock) { + pFlac->currentFLACFrame.header = init.firstFrameHeader; + for (;;) { + drflac_result result = drflac__decode_flac_frame(pFlac); + if (result == DRFLAC_SUCCESS) { + break; + } else { + if (result == DRFLAC_CRC_MISMATCH) { + if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) { + drflac__free_from_callbacks(pFlac, &allocationCallbacks); + return NULL; + } + continue; + } else { + drflac__free_from_callbacks(pFlac, &allocationCallbacks); + return NULL; + } + } + } + } + + return pFlac; +} + +static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead) +{ + drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData; + size_t bytesRemaining; + + DRFLAC_ASSERT(memoryStream != NULL); + DRFLAC_ASSERT(memoryStream->dataSize >= memoryStream->currentReadPos); + + bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos; + if (bytesToRead > bytesRemaining) { + bytesToRead = bytesRemaining; + } + + if (bytesToRead > 0) { + DRFLAC_COPY_MEMORY(bufferOut, memoryStream->data + memoryStream->currentReadPos, bytesToRead); + memoryStream->currentReadPos += bytesToRead; + } + + return bytesToRead; +} + +static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin) +{ + drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData; + + DRFLAC_ASSERT(memoryStream != NULL); + DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */ + + if (offset > (drflac_int64)memoryStream->dataSize) { + return DRFLAC_FALSE; + } + + if (origin == drflac_seek_origin_current) { + if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) { + memoryStream->currentReadPos += offset; + } else { + return DRFLAC_FALSE; /* Trying to seek too far forward. */ + } + } else { + if ((drflac_uint32)offset <= memoryStream->dataSize) { + memoryStream->currentReadPos = offset; + } else { + return DRFLAC_FALSE; /* Trying to seek too far forward. */ + } + } + + return DRFLAC_TRUE; +} + +DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac__memory_stream memoryStream; + drflac* pFlac; + + memoryStream.data = (const drflac_uint8*)pData; + memoryStream.dataSize = dataSize; + memoryStream.currentReadPos = 0; + pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, &memoryStream, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + pFlac->memoryStream = memoryStream; + + /* This is an awful hack... */ +#ifndef DR_FLAC_NO_OGG + if (pFlac->container == drflac_container_ogg) + { + drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; + oggbs->pUserData = &pFlac->memoryStream; + } + else +#endif + { + pFlac->bs.pUserData = &pFlac->memoryStream; + } + + return pFlac; +} + +DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac__memory_stream memoryStream; + drflac* pFlac; + + memoryStream.data = (const drflac_uint8*)pData; + memoryStream.dataSize = dataSize; + memoryStream.currentReadPos = 0; + pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + pFlac->memoryStream = memoryStream; + + /* This is an awful hack... */ +#ifndef DR_FLAC_NO_OGG + if (pFlac->container == drflac_container_ogg) + { + drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs; + oggbs->pUserData = &pFlac->memoryStream; + } + else +#endif + { + pFlac->bs.pUserData = &pFlac->memoryStream; + } + + return pFlac; +} + + + +DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + return drflac_open_with_metadata_private(onRead, onSeek, NULL, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks); +} +DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + return drflac_open_with_metadata_private(onRead, onSeek, NULL, container, pUserData, pUserData, pAllocationCallbacks); +} + +DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + return drflac_open_with_metadata_private(onRead, onSeek, onMeta, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks); +} +DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + return drflac_open_with_metadata_private(onRead, onSeek, onMeta, container, pUserData, pUserData, pAllocationCallbacks); +} + +DRFLAC_API void drflac_close(drflac* pFlac) +{ + if (pFlac == NULL) { + return; + } + + drflac__free_from_callbacks(pFlac, &pFlac->allocationCallbacks); +} + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1; + + drflac_uint32 right0 = left0 - side0; + drflac_uint32 right1 = left1 - side1; + drflac_uint32 right2 = left2 - side2; + drflac_uint32 right3 = left3 - side3; + + pOutputSamples[i*8+0] = (drflac_int32)left0; + pOutputSamples[i*8+1] = (drflac_int32)right0; + pOutputSamples[i*8+2] = (drflac_int32)left1; + pOutputSamples[i*8+3] = (drflac_int32)right1; + pOutputSamples[i*8+4] = (drflac_int32)left2; + pOutputSamples[i*8+5] = (drflac_int32)right2; + pOutputSamples[i*8+6] = (drflac_int32)left3; + pOutputSamples[i*8+7] = (drflac_int32)right3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i right = _mm_sub_epi32(left, side); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + int32x4_t shift0_4; + int32x4_t shift1_4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t left; + uint32x4_t side; + uint32x4_t right; + + left = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4); + right = vsubq_u32(left, side); + + drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ + drflac_read_pcm_frames_s32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } +} + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 side0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 side1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 side2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 side3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1; + + drflac_uint32 left0 = right0 + side0; + drflac_uint32 left1 = right1 + side1; + drflac_uint32 left2 = right2 + side2; + drflac_uint32 left3 = right3 + side3; + + pOutputSamples[i*8+0] = (drflac_int32)left0; + pOutputSamples[i*8+1] = (drflac_int32)right0; + pOutputSamples[i*8+2] = (drflac_int32)left1; + pOutputSamples[i*8+3] = (drflac_int32)right1; + pOutputSamples[i*8+4] = (drflac_int32)left2; + pOutputSamples[i*8+5] = (drflac_int32)right2; + pOutputSamples[i*8+6] = (drflac_int32)left3; + pOutputSamples[i*8+7] = (drflac_int32)right3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + for (i = 0; i < frameCount4; ++i) { + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i left = _mm_add_epi32(right, side); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + int32x4_t shift0_4; + int32x4_t shift1_4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t side; + uint32x4_t right; + uint32x4_t left; + + side = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4); + right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4); + left = vaddq_u32(right, side); + + drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (drflac_int32)left; + pOutputSamples[i*2+1] = (drflac_int32)right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ + drflac_read_pcm_frames_s32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } +} + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_int32 shift = unusedBitsPerSample; + + if (shift > 0) { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 temp0L; + drflac_uint32 temp1L; + drflac_uint32 temp2L; + drflac_uint32 temp3L; + drflac_uint32 temp0R; + drflac_uint32 temp1R; + drflac_uint32 temp2R; + drflac_uint32 temp3R; + + drflac_uint32 mid0 = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid1 = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid2 = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid3 = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (mid0 << 1) | (side0 & 0x01); + mid1 = (mid1 << 1) | (side1 & 0x01); + mid2 = (mid2 << 1) | (side2 & 0x01); + mid3 = (mid3 << 1) | (side3 & 0x01); + + temp0L = (mid0 + side0) << shift; + temp1L = (mid1 + side1) << shift; + temp2L = (mid2 + side2) << shift; + temp3L = (mid3 + side3) << shift; + + temp0R = (mid0 - side0) << shift; + temp1R = (mid1 - side1) << shift; + temp2R = (mid2 - side2) << shift; + temp3R = (mid3 - side3) << shift; + + pOutputSamples[i*8+0] = (drflac_int32)temp0L; + pOutputSamples[i*8+1] = (drflac_int32)temp0R; + pOutputSamples[i*8+2] = (drflac_int32)temp1L; + pOutputSamples[i*8+3] = (drflac_int32)temp1R; + pOutputSamples[i*8+4] = (drflac_int32)temp2L; + pOutputSamples[i*8+5] = (drflac_int32)temp2R; + pOutputSamples[i*8+6] = (drflac_int32)temp3L; + pOutputSamples[i*8+7] = (drflac_int32)temp3R; + } + } else { + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 temp0L; + drflac_uint32 temp1L; + drflac_uint32 temp2L; + drflac_uint32 temp3L; + drflac_uint32 temp0R; + drflac_uint32 temp1R; + drflac_uint32 temp2R; + drflac_uint32 temp3R; + + drflac_uint32 mid0 = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid1 = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid2 = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid3 = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (mid0 << 1) | (side0 & 0x01); + mid1 = (mid1 << 1) | (side1 & 0x01); + mid2 = (mid2 << 1) | (side2 & 0x01); + mid3 = (mid3 << 1) | (side3 & 0x01); + + temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1); + temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1); + temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1); + temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1); + + temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1); + temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1); + temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1); + temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1); + + pOutputSamples[i*8+0] = (drflac_int32)temp0L; + pOutputSamples[i*8+1] = (drflac_int32)temp0R; + pOutputSamples[i*8+2] = (drflac_int32)temp1L; + pOutputSamples[i*8+3] = (drflac_int32)temp1R; + pOutputSamples[i*8+4] = (drflac_int32)temp2L; + pOutputSamples[i*8+5] = (drflac_int32)temp2R; + pOutputSamples[i*8+6] = (drflac_int32)temp3L; + pOutputSamples[i*8+7] = (drflac_int32)temp3R; + } + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample); + pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample); + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_int32 shift = unusedBitsPerSample; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i left; + __m128i right; + + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + left = _mm_srai_epi32(_mm_add_epi32(mid, side), 1); + right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1; + pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1; + } + } else { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i left; + __m128i right; + + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + left = _mm_slli_epi32(_mm_add_epi32(mid, side), shift); + right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift); + pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift); + } + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_int32 shift = unusedBitsPerSample; + int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */ + int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */ + uint32x4_t one4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + one4 = vdupq_n_u32(1); + + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + uint32x4_t mid; + uint32x4_t side; + int32x4_t left; + int32x4_t right; + + mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4); + + mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4)); + + left = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1); + right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1); + + drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1; + pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1; + } + } else { + int32x4_t shift4; + + shift -= 1; + shift4 = vdupq_n_s32(shift); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t mid; + uint32x4_t side; + int32x4_t left; + int32x4_t right; + + mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4); + + mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4)); + + left = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4)); + right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4)); + + drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift); + pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift); + } + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ + drflac_read_pcm_frames_s32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } +} + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1; + + pOutputSamples[i*8+0] = (drflac_int32)tempL0; + pOutputSamples[i*8+1] = (drflac_int32)tempR0; + pOutputSamples[i*8+2] = (drflac_int32)tempL1; + pOutputSamples[i*8+3] = (drflac_int32)tempR1; + pOutputSamples[i*8+4] = (drflac_int32)tempL2; + pOutputSamples[i*8+5] = (drflac_int32)tempR2; + pOutputSamples[i*8+6] = (drflac_int32)tempL3; + pOutputSamples[i*8+7] = (drflac_int32)tempR3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0); + pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1); + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right)); + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0); + pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1); + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + int32x4_t shift4_0 = vdupq_n_s32(shift0); + int32x4_t shift4_1 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + int32x4_t left; + int32x4_t right; + + left = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift4_0)); + right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift4_1)); + + drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0); + pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ + drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } +} + + +DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut) +{ + drflac_uint64 framesRead; + drflac_uint32 unusedBitsPerSample; + + if (pFlac == NULL || framesToRead == 0) { + return 0; + } + + if (pBufferOut == NULL) { + return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead); + } + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 32); + unusedBitsPerSample = 32 - pFlac->bitsPerSample; + + framesRead = 0; + while (framesToRead > 0) { + /* If we've run out of samples in this frame, go to the next. */ + if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; /* Couldn't read the next frame, so just break from the loop and return. */ + } + } else { + unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining; + drflac_uint64 frameCountThisIteration = framesToRead; + + if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) { + frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining; + } + + if (channelCount == 2) { + const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame; + const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame; + + switch (pFlac->currentFLACFrame.header.channelAssignment) + { + case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: + { + drflac_read_pcm_frames_s32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: + { + drflac_read_pcm_frames_s32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: + { + drflac_read_pcm_frames_s32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: + default: + { + drflac_read_pcm_frames_s32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + } + } else { + /* Generic interleaving. */ + drflac_uint64 i; + for (i = 0; i < frameCountThisIteration; ++i) { + unsigned int j; + for (j = 0; j < channelCount; ++j) { + pBufferOut[(i*channelCount)+j] = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample)); + } + } + } + + framesRead += frameCountThisIteration; + pBufferOut += frameCountThisIteration * channelCount; + framesToRead -= frameCountThisIteration; + pFlac->currentPCMFrame += frameCountThisIteration; + pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration; + } + } + + return framesRead; +} + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1; + + drflac_uint32 right0 = left0 - side0; + drflac_uint32 right1 = left1 - side1; + drflac_uint32 right2 = left2 - side2; + drflac_uint32 right3 = left3 - side3; + + left0 >>= 16; + left1 >>= 16; + left2 >>= 16; + left3 >>= 16; + + right0 >>= 16; + right1 >>= 16; + right2 >>= 16; + right3 >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)left0; + pOutputSamples[i*8+1] = (drflac_int16)right0; + pOutputSamples[i*8+2] = (drflac_int16)left1; + pOutputSamples[i*8+3] = (drflac_int16)right1; + pOutputSamples[i*8+4] = (drflac_int16)left2; + pOutputSamples[i*8+5] = (drflac_int16)right2; + pOutputSamples[i*8+6] = (drflac_int16)left3; + pOutputSamples[i*8+7] = (drflac_int16)right3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i right = _mm_sub_epi32(left, side); + + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + int32x4_t shift0_4; + int32x4_t shift1_4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t left; + uint32x4_t side; + uint32x4_t right; + + left = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4); + right = vsubq_u32(left, side); + + left = vshrq_n_u32(left, 16); + right = vshrq_n_u32(right, 16); + + drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right))); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ + drflac_read_pcm_frames_s16__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } +} + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 side0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 side1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 side2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 side3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1; + + drflac_uint32 left0 = right0 + side0; + drflac_uint32 left1 = right1 + side1; + drflac_uint32 left2 = right2 + side2; + drflac_uint32 left3 = right3 + side3; + + left0 >>= 16; + left1 >>= 16; + left2 >>= 16; + left3 >>= 16; + + right0 >>= 16; + right1 >>= 16; + right2 >>= 16; + right3 >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)left0; + pOutputSamples[i*8+1] = (drflac_int16)right0; + pOutputSamples[i*8+2] = (drflac_int16)left1; + pOutputSamples[i*8+3] = (drflac_int16)right1; + pOutputSamples[i*8+4] = (drflac_int16)left2; + pOutputSamples[i*8+5] = (drflac_int16)right2; + pOutputSamples[i*8+6] = (drflac_int16)left3; + pOutputSamples[i*8+7] = (drflac_int16)right3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + for (i = 0; i < frameCount4; ++i) { + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i left = _mm_add_epi32(right, side); + + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + int32x4_t shift0_4; + int32x4_t shift1_4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t side; + uint32x4_t right; + uint32x4_t left; + + side = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4); + right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4); + left = vaddq_u32(right, side); + + left = vshrq_n_u32(left, 16); + right = vshrq_n_u32(right, 16); + + drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right))); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + left >>= 16; + right >>= 16; + + pOutputSamples[i*2+0] = (drflac_int16)left; + pOutputSamples[i*2+1] = (drflac_int16)right; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ + drflac_read_pcm_frames_s16__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } +} + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift = unusedBitsPerSample; + + if (shift > 0) { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 temp0L; + drflac_uint32 temp1L; + drflac_uint32 temp2L; + drflac_uint32 temp3L; + drflac_uint32 temp0R; + drflac_uint32 temp1R; + drflac_uint32 temp2R; + drflac_uint32 temp3R; + + drflac_uint32 mid0 = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid1 = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid2 = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid3 = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (mid0 << 1) | (side0 & 0x01); + mid1 = (mid1 << 1) | (side1 & 0x01); + mid2 = (mid2 << 1) | (side2 & 0x01); + mid3 = (mid3 << 1) | (side3 & 0x01); + + temp0L = (mid0 + side0) << shift; + temp1L = (mid1 + side1) << shift; + temp2L = (mid2 + side2) << shift; + temp3L = (mid3 + side3) << shift; + + temp0R = (mid0 - side0) << shift; + temp1R = (mid1 - side1) << shift; + temp2R = (mid2 - side2) << shift; + temp3R = (mid3 - side3) << shift; + + temp0L >>= 16; + temp1L >>= 16; + temp2L >>= 16; + temp3L >>= 16; + + temp0R >>= 16; + temp1R >>= 16; + temp2R >>= 16; + temp3R >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)temp0L; + pOutputSamples[i*8+1] = (drflac_int16)temp0R; + pOutputSamples[i*8+2] = (drflac_int16)temp1L; + pOutputSamples[i*8+3] = (drflac_int16)temp1R; + pOutputSamples[i*8+4] = (drflac_int16)temp2L; + pOutputSamples[i*8+5] = (drflac_int16)temp2R; + pOutputSamples[i*8+6] = (drflac_int16)temp3L; + pOutputSamples[i*8+7] = (drflac_int16)temp3R; + } + } else { + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 temp0L; + drflac_uint32 temp1L; + drflac_uint32 temp2L; + drflac_uint32 temp3L; + drflac_uint32 temp0R; + drflac_uint32 temp1R; + drflac_uint32 temp2R; + drflac_uint32 temp3R; + + drflac_uint32 mid0 = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid1 = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid2 = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid3 = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (mid0 << 1) | (side0 & 0x01); + mid1 = (mid1 << 1) | (side1 & 0x01); + mid2 = (mid2 << 1) | (side2 & 0x01); + mid3 = (mid3 << 1) | (side3 & 0x01); + + temp0L = ((drflac_int32)(mid0 + side0) >> 1); + temp1L = ((drflac_int32)(mid1 + side1) >> 1); + temp2L = ((drflac_int32)(mid2 + side2) >> 1); + temp3L = ((drflac_int32)(mid3 + side3) >> 1); + + temp0R = ((drflac_int32)(mid0 - side0) >> 1); + temp1R = ((drflac_int32)(mid1 - side1) >> 1); + temp2R = ((drflac_int32)(mid2 - side2) >> 1); + temp3R = ((drflac_int32)(mid3 - side3) >> 1); + + temp0L >>= 16; + temp1L >>= 16; + temp2L >>= 16; + temp3L >>= 16; + + temp0R >>= 16; + temp1R >>= 16; + temp2R >>= 16; + temp3R >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)temp0L; + pOutputSamples[i*8+1] = (drflac_int16)temp0R; + pOutputSamples[i*8+2] = (drflac_int16)temp1L; + pOutputSamples[i*8+3] = (drflac_int16)temp1R; + pOutputSamples[i*8+4] = (drflac_int16)temp2L; + pOutputSamples[i*8+5] = (drflac_int16)temp2R; + pOutputSamples[i*8+6] = (drflac_int16)temp3L; + pOutputSamples[i*8+7] = (drflac_int16)temp3R; + } + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16); + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift = unusedBitsPerSample; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i left; + __m128i right; + + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + left = _mm_srai_epi32(_mm_add_epi32(mid, side), 1); + right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1); + + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16); + } + } else { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i left; + __m128i right; + + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + left = _mm_slli_epi32(_mm_add_epi32(mid, side), shift); + right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift); + + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); + + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16); + } + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift = unusedBitsPerSample; + int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */ + int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */ + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + uint32x4_t mid; + uint32x4_t side; + int32x4_t left; + int32x4_t right; + + mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4); + + mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1))); + + left = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1); + right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1); + + left = vshrq_n_s32(left, 16); + right = vshrq_n_s32(right, 16); + + drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16); + } + } else { + int32x4_t shift4; + + shift -= 1; + shift4 = vdupq_n_s32(shift); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t mid; + uint32x4_t side; + int32x4_t left; + int32x4_t right; + + mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4); + + mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1))); + + left = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4)); + right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4)); + + left = vshrq_n_s32(left, 16); + right = vshrq_n_s32(right, 16); + + drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16); + } + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ + drflac_read_pcm_frames_s16__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } +} + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1; + + tempL0 >>= 16; + tempL1 >>= 16; + tempL2 >>= 16; + tempL3 >>= 16; + + tempR0 >>= 16; + tempR1 >>= 16; + tempR2 >>= 16; + tempR3 >>= 16; + + pOutputSamples[i*8+0] = (drflac_int16)tempL0; + pOutputSamples[i*8+1] = (drflac_int16)tempR0; + pOutputSamples[i*8+2] = (drflac_int16)tempL1; + pOutputSamples[i*8+3] = (drflac_int16)tempR1; + pOutputSamples[i*8+4] = (drflac_int16)tempL2; + pOutputSamples[i*8+5] = (drflac_int16)tempR2; + pOutputSamples[i*8+6] = (drflac_int16)tempL3; + pOutputSamples[i*8+7] = (drflac_int16)tempR3; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16); + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + + left = _mm_srai_epi32(left, 16); + right = _mm_srai_epi32(right, 16); + + /* At this point we have results. We can now pack and interleave these into a single __m128i object and then store the in the output buffer. */ + _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16); + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + int32x4_t shift0_4 = vdupq_n_s32(shift0); + int32x4_t shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + int32x4_t left; + int32x4_t right; + + left = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4)); + right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4)); + + left = vshrq_n_s32(left, 16); + right = vshrq_n_s32(right, 16); + + drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right))); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16); + pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16); + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_s16__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ + drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } +} + +DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut) +{ + drflac_uint64 framesRead; + drflac_uint32 unusedBitsPerSample; + + if (pFlac == NULL || framesToRead == 0) { + return 0; + } + + if (pBufferOut == NULL) { + return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead); + } + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 32); + unusedBitsPerSample = 32 - pFlac->bitsPerSample; + + framesRead = 0; + while (framesToRead > 0) { + /* If we've run out of samples in this frame, go to the next. */ + if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; /* Couldn't read the next frame, so just break from the loop and return. */ + } + } else { + unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining; + drflac_uint64 frameCountThisIteration = framesToRead; + + if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) { + frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining; + } + + if (channelCount == 2) { + const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame; + const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame; + + switch (pFlac->currentFLACFrame.header.channelAssignment) + { + case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: + { + drflac_read_pcm_frames_s16__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: + { + drflac_read_pcm_frames_s16__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: + { + drflac_read_pcm_frames_s16__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: + default: + { + drflac_read_pcm_frames_s16__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + } + } else { + /* Generic interleaving. */ + drflac_uint64 i; + for (i = 0; i < frameCountThisIteration; ++i) { + unsigned int j; + for (j = 0; j < channelCount; ++j) { + drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample)); + pBufferOut[(i*channelCount)+j] = (drflac_int16)(sampleS32 >> 16); + } + } + } + + framesRead += frameCountThisIteration; + pBufferOut += frameCountThisIteration * channelCount; + framesToRead -= frameCountThisIteration; + pFlac->currentPCMFrame += frameCountThisIteration; + pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration; + } + } + + return framesRead; +} + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + float factor = 1 / 2147483648.0; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1; + + drflac_uint32 right0 = left0 - side0; + drflac_uint32 right1 = left1 - side1; + drflac_uint32 right2 = left2 - side2; + drflac_uint32 right3 = left3 - side3; + + pOutputSamples[i*8+0] = (drflac_int32)left0 * factor; + pOutputSamples[i*8+1] = (drflac_int32)right0 * factor; + pOutputSamples[i*8+2] = (drflac_int32)left1 * factor; + pOutputSamples[i*8+3] = (drflac_int32)right1 * factor; + pOutputSamples[i*8+4] = (drflac_int32)left2 * factor; + pOutputSamples[i*8+5] = (drflac_int32)right2 * factor; + pOutputSamples[i*8+6] = (drflac_int32)left3 * factor; + pOutputSamples[i*8+7] = (drflac_int32)right3 * factor; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (drflac_int32)left * factor; + pOutputSamples[i*2+1] = (drflac_int32)right * factor; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + __m128 factor; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + factor = _mm_set1_ps(1.0f / 8388608.0f); + + for (i = 0; i < frameCount4; ++i) { + __m128i left = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i right = _mm_sub_epi32(left, side); + __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor); + __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor); + + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (drflac_int32)left / 8388608.0f; + pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + float32x4_t factor4; + int32x4_t shift0_4; + int32x4_t shift1_4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + factor4 = vdupq_n_f32(1.0f / 8388608.0f); + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t left; + uint32x4_t side; + uint32x4_t right; + float32x4_t leftf; + float32x4_t rightf; + + left = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4); + right = vsubq_u32(left, side); + leftf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 left = pInputSamples0U32[i] << shift0; + drflac_uint32 side = pInputSamples1U32[i] << shift1; + drflac_uint32 right = left - side; + + pOutputSamples[i*2+0] = (drflac_int32)left / 8388608.0f; + pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ + drflac_read_pcm_frames_f32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } +} + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + float factor = 1 / 2147483648.0; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 side0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 side1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 side2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 side3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1; + + drflac_uint32 left0 = right0 + side0; + drflac_uint32 left1 = right1 + side1; + drflac_uint32 left2 = right2 + side2; + drflac_uint32 left3 = right3 + side3; + + pOutputSamples[i*8+0] = (drflac_int32)left0 * factor; + pOutputSamples[i*8+1] = (drflac_int32)right0 * factor; + pOutputSamples[i*8+2] = (drflac_int32)left1 * factor; + pOutputSamples[i*8+3] = (drflac_int32)right1 * factor; + pOutputSamples[i*8+4] = (drflac_int32)left2 * factor; + pOutputSamples[i*8+5] = (drflac_int32)right2 * factor; + pOutputSamples[i*8+6] = (drflac_int32)left3 * factor; + pOutputSamples[i*8+7] = (drflac_int32)right3 * factor; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (drflac_int32)left * factor; + pOutputSamples[i*2+1] = (drflac_int32)right * factor; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + __m128 factor; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + factor = _mm_set1_ps(1.0f / 8388608.0f); + + for (i = 0; i < frameCount4; ++i) { + __m128i side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + __m128i left = _mm_add_epi32(right, side); + __m128 leftf = _mm_mul_ps(_mm_cvtepi32_ps(left), factor); + __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor); + + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (drflac_int32)left / 8388608.0f; + pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + float32x4_t factor4; + int32x4_t shift0_4; + int32x4_t shift1_4; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + factor4 = vdupq_n_f32(1.0f / 8388608.0f); + shift0_4 = vdupq_n_s32(shift0); + shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + uint32x4_t side; + uint32x4_t right; + uint32x4_t left; + float32x4_t leftf; + float32x4_t rightf; + + side = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4); + right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4); + left = vaddq_u32(right, side); + leftf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 side = pInputSamples0U32[i] << shift0; + drflac_uint32 right = pInputSamples1U32[i] << shift1; + drflac_uint32 left = right + side; + + pOutputSamples[i*2+0] = (drflac_int32)left / 8388608.0f; + pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ + drflac_read_pcm_frames_f32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } +} + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift = unusedBitsPerSample; + float factor = 1 / 2147483648.0; + + if (shift > 0) { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 temp0L; + drflac_uint32 temp1L; + drflac_uint32 temp2L; + drflac_uint32 temp3L; + drflac_uint32 temp0R; + drflac_uint32 temp1R; + drflac_uint32 temp2R; + drflac_uint32 temp3R; + + drflac_uint32 mid0 = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid1 = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid2 = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid3 = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (mid0 << 1) | (side0 & 0x01); + mid1 = (mid1 << 1) | (side1 & 0x01); + mid2 = (mid2 << 1) | (side2 & 0x01); + mid3 = (mid3 << 1) | (side3 & 0x01); + + temp0L = (mid0 + side0) << shift; + temp1L = (mid1 + side1) << shift; + temp2L = (mid2 + side2) << shift; + temp3L = (mid3 + side3) << shift; + + temp0R = (mid0 - side0) << shift; + temp1R = (mid1 - side1) << shift; + temp2R = (mid2 - side2) << shift; + temp3R = (mid3 - side3) << shift; + + pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor; + pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor; + pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor; + pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor; + pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor; + pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor; + pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor; + pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor; + } + } else { + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 temp0L; + drflac_uint32 temp1L; + drflac_uint32 temp2L; + drflac_uint32 temp3L; + drflac_uint32 temp0R; + drflac_uint32 temp1R; + drflac_uint32 temp2R; + drflac_uint32 temp3R; + + drflac_uint32 mid0 = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid1 = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid2 = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 mid3 = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + + drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid0 = (mid0 << 1) | (side0 & 0x01); + mid1 = (mid1 << 1) | (side1 & 0x01); + mid2 = (mid2 << 1) | (side2 & 0x01); + mid3 = (mid3 << 1) | (side3 & 0x01); + + temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1); + temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1); + temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1); + temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1); + + temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1); + temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1); + temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1); + temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1); + + pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor; + pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor; + pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor; + pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor; + pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor; + pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor; + pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor; + pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor; + } + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) * factor; + pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) * factor; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift = unusedBitsPerSample - 8; + float factor; + __m128 factor128; + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + factor = 1.0f / 8388608.0f; + factor128 = _mm_set1_ps(factor); + + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i tempL; + __m128i tempR; + __m128 leftf; + __m128 rightf; + + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + tempL = _mm_srai_epi32(_mm_add_epi32(mid, side), 1); + tempR = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1); + + leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128); + rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128); + + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor; + pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor; + } + } else { + shift -= 1; + for (i = 0; i < frameCount4; ++i) { + __m128i mid; + __m128i side; + __m128i tempL; + __m128i tempR; + __m128 leftf; + __m128 rightf; + + mid = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + side = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + mid = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01))); + + tempL = _mm_slli_epi32(_mm_add_epi32(mid, side), shift); + tempR = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift); + + leftf = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128); + rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128); + + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor; + pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor; + } + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift = unusedBitsPerSample - 8; + float factor; + float32x4_t factor4; + int32x4_t shift4; + int32x4_t wbps0_4; /* Wasted Bits Per Sample */ + int32x4_t wbps1_4; /* Wasted Bits Per Sample */ + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 24); + + factor = 1.0f / 8388608.0f; + factor4 = vdupq_n_f32(factor); + wbps0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample); + wbps1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample); + + if (shift == 0) { + for (i = 0; i < frameCount4; ++i) { + int32x4_t lefti; + int32x4_t righti; + float32x4_t leftf; + float32x4_t rightf; + + uint32x4_t mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4); + uint32x4_t side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4); + + mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1))); + + lefti = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1); + righti = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1); + + leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor; + pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor; + } + } else { + shift -= 1; + shift4 = vdupq_n_s32(shift); + for (i = 0; i < frameCount4; ++i) { + uint32x4_t mid; + uint32x4_t side; + int32x4_t lefti; + int32x4_t righti; + float32x4_t leftf; + float32x4_t rightf; + + mid = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4); + side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4); + + mid = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1))); + + lefti = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4)); + righti = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4)); + + leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + drflac_uint32 mid = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + + mid = (mid << 1) | (side & 0x01); + + pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor; + pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor; + } + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ + drflac_read_pcm_frames_f32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } +} + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample; + drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample; + float factor = 1 / 2147483648.0; + + for (i = 0; i < frameCount4; ++i) { + drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0; + drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0; + drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0; + drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0; + + drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1; + drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1; + drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1; + drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1; + + pOutputSamples[i*8+0] = (drflac_int32)tempL0 * factor; + pOutputSamples[i*8+1] = (drflac_int32)tempR0 * factor; + pOutputSamples[i*8+2] = (drflac_int32)tempL1 * factor; + pOutputSamples[i*8+3] = (drflac_int32)tempR1 * factor; + pOutputSamples[i*8+4] = (drflac_int32)tempL2 * factor; + pOutputSamples[i*8+5] = (drflac_int32)tempR2 * factor; + pOutputSamples[i*8+6] = (drflac_int32)tempL3 * factor; + pOutputSamples[i*8+7] = (drflac_int32)tempR3 * factor; + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor; + pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor; + } +} + +#if defined(DRFLAC_SUPPORT_SSE2) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + + float factor = 1.0f / 8388608.0f; + __m128 factor128 = _mm_set1_ps(factor); + + for (i = 0; i < frameCount4; ++i) { + __m128i lefti; + __m128i righti; + __m128 leftf; + __m128 rightf; + + lefti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0); + righti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1); + + leftf = _mm_mul_ps(_mm_cvtepi32_ps(lefti), factor128); + rightf = _mm_mul_ps(_mm_cvtepi32_ps(righti), factor128); + + _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf)); + _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor; + pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor; + } +} +#endif + +#if defined(DRFLAC_SUPPORT_NEON) +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ + drflac_uint64 i; + drflac_uint64 frameCount4 = frameCount >> 2; + const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0; + const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1; + drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8; + drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8; + + float factor = 1.0f / 8388608.0f; + float32x4_t factor4 = vdupq_n_f32(factor); + int32x4_t shift0_4 = vdupq_n_s32(shift0); + int32x4_t shift1_4 = vdupq_n_s32(shift1); + + for (i = 0; i < frameCount4; ++i) { + int32x4_t lefti; + int32x4_t righti; + float32x4_t leftf; + float32x4_t rightf; + + lefti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4)); + righti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4)); + + leftf = vmulq_f32(vcvtq_f32_s32(lefti), factor4); + rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4); + + drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf)); + } + + for (i = (frameCount4 << 2); i < frameCount; ++i) { + pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor; + pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor; + } +} +#endif + +static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples) +{ +#if defined(DRFLAC_SUPPORT_SSE2) + if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#elif defined(DRFLAC_SUPPORT_NEON) + if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) { + drflac_read_pcm_frames_f32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } else +#endif + { + /* Scalar fallback. */ + drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples); + } +} + +DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut) +{ + drflac_uint64 framesRead; + drflac_uint32 unusedBitsPerSample; + + if (pFlac == NULL || framesToRead == 0) { + return 0; + } + + if (pBufferOut == NULL) { + return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead); + } + + DRFLAC_ASSERT(pFlac->bitsPerSample <= 32); + unusedBitsPerSample = 32 - pFlac->bitsPerSample; + + framesRead = 0; + while (framesToRead > 0) { + /* If we've run out of samples in this frame, go to the next. */ + if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) { + if (!drflac__read_and_decode_next_flac_frame(pFlac)) { + break; /* Couldn't read the next frame, so just break from the loop and return. */ + } + } else { + unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment); + drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining; + drflac_uint64 frameCountThisIteration = framesToRead; + + if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) { + frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining; + } + + if (channelCount == 2) { + const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame; + const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame; + + switch (pFlac->currentFLACFrame.header.channelAssignment) + { + case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE: + { + drflac_read_pcm_frames_f32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE: + { + drflac_read_pcm_frames_f32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE: + { + drflac_read_pcm_frames_f32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + + case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT: + default: + { + drflac_read_pcm_frames_f32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut); + } break; + } + } else { + /* Generic interleaving. */ + drflac_uint64 i; + for (i = 0; i < frameCountThisIteration; ++i) { + unsigned int j; + for (j = 0; j < channelCount; ++j) { + drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample)); + pBufferOut[(i*channelCount)+j] = (float)(sampleS32 / 2147483648.0); + } + } + } + + framesRead += frameCountThisIteration; + pBufferOut += frameCountThisIteration * channelCount; + framesToRead -= frameCountThisIteration; + pFlac->currentPCMFrame += frameCountThisIteration; + pFlac->currentFLACFrame.pcmFramesRemaining -= (unsigned int)frameCountThisIteration; + } + } + + return framesRead; +} + + +DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex) +{ + if (pFlac == NULL) { + return DRFLAC_FALSE; + } + + /* Don't do anything if we're already on the seek point. */ + if (pFlac->currentPCMFrame == pcmFrameIndex) { + return DRFLAC_TRUE; + } + + /* + If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present + when the decoder was opened. + */ + if (pFlac->firstFLACFramePosInBytes == 0) { + return DRFLAC_FALSE; + } + + if (pcmFrameIndex == 0) { + pFlac->currentPCMFrame = 0; + return drflac__seek_to_first_frame(pFlac); + } else { + drflac_bool32 wasSuccessful = DRFLAC_FALSE; + + /* Clamp the sample to the end. */ + if (pcmFrameIndex > pFlac->totalPCMFrameCount) { + pcmFrameIndex = pFlac->totalPCMFrameCount; + } + + /* If the target sample and the current sample are in the same frame we just move the position forward. */ + if (pcmFrameIndex > pFlac->currentPCMFrame) { + /* Forward. */ + drflac_uint32 offset = (drflac_uint32)(pcmFrameIndex - pFlac->currentPCMFrame); + if (pFlac->currentFLACFrame.pcmFramesRemaining > offset) { + pFlac->currentFLACFrame.pcmFramesRemaining -= offset; + pFlac->currentPCMFrame = pcmFrameIndex; + return DRFLAC_TRUE; + } + } else { + /* Backward. */ + drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentPCMFrame - pcmFrameIndex); + drflac_uint32 currentFLACFramePCMFrameCount = pFlac->currentFLACFrame.header.blockSizeInPCMFrames; + drflac_uint32 currentFLACFramePCMFramesConsumed = currentFLACFramePCMFrameCount - pFlac->currentFLACFrame.pcmFramesRemaining; + if (currentFLACFramePCMFramesConsumed > offsetAbs) { + pFlac->currentFLACFrame.pcmFramesRemaining += offsetAbs; + pFlac->currentPCMFrame = pcmFrameIndex; + return DRFLAC_TRUE; + } + } + + /* + Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so + we'll instead use Ogg's natural seeking facility. + */ +#ifndef DR_FLAC_NO_OGG + if (pFlac->container == drflac_container_ogg) + { + wasSuccessful = drflac_ogg__seek_to_pcm_frame(pFlac, pcmFrameIndex); + } + else +#endif + { + /* First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. */ + if (/*!wasSuccessful && */!pFlac->_noSeekTableSeek) { + wasSuccessful = drflac__seek_to_pcm_frame__seek_table(pFlac, pcmFrameIndex); + } + +#if !defined(DR_FLAC_NO_CRC) + /* Fall back to binary search if seek table seeking fails. This requires the length of the stream to be known. */ + if (!wasSuccessful && !pFlac->_noBinarySearchSeek && pFlac->totalPCMFrameCount > 0) { + wasSuccessful = drflac__seek_to_pcm_frame__binary_search(pFlac, pcmFrameIndex); + } +#endif + + /* Fall back to brute force if all else fails. */ + if (!wasSuccessful && !pFlac->_noBruteForceSeek) { + wasSuccessful = drflac__seek_to_pcm_frame__brute_force(pFlac, pcmFrameIndex); + } + } + + pFlac->currentPCMFrame = pcmFrameIndex; + return wasSuccessful; + } +} + + + +/* High Level APIs */ + +#if defined(SIZE_MAX) + #define DRFLAC_SIZE_MAX SIZE_MAX +#else + #if defined(DRFLAC_64BIT) + #define DRFLAC_SIZE_MAX ((drflac_uint64)0xFFFFFFFFFFFFFFFF) + #else + #define DRFLAC_SIZE_MAX 0xFFFFFFFF + #endif +#endif + + +/* Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. */ +#define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \ +static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\ +{ \ + type* pSampleData = NULL; \ + drflac_uint64 totalPCMFrameCount; \ + \ + DRFLAC_ASSERT(pFlac != NULL); \ + \ + totalPCMFrameCount = pFlac->totalPCMFrameCount; \ + \ + if (totalPCMFrameCount == 0) { \ + type buffer[4096]; \ + drflac_uint64 pcmFramesRead; \ + size_t sampleDataBufferSize = sizeof(buffer); \ + \ + pSampleData = (type*)drflac__malloc_from_callbacks(sampleDataBufferSize, &pFlac->allocationCallbacks); \ + if (pSampleData == NULL) { \ + goto on_error; \ + } \ + \ + while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) { \ + if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) { \ + type* pNewSampleData; \ + size_t newSampleDataBufferSize; \ + \ + newSampleDataBufferSize = sampleDataBufferSize * 2; \ + pNewSampleData = (type*)drflac__realloc_from_callbacks(pSampleData, newSampleDataBufferSize, sampleDataBufferSize, &pFlac->allocationCallbacks); \ + if (pNewSampleData == NULL) { \ + drflac__free_from_callbacks(pSampleData, &pFlac->allocationCallbacks); \ + goto on_error; \ + } \ + \ + sampleDataBufferSize = newSampleDataBufferSize; \ + pSampleData = pNewSampleData; \ + } \ + \ + DRFLAC_COPY_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type))); \ + totalPCMFrameCount += pcmFramesRead; \ + } \ + \ + /* At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to \ + protect those ears from random noise! */ \ + DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type))); \ + } else { \ + drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type); \ + if (dataSize > DRFLAC_SIZE_MAX) { \ + goto on_error; /* The decoded data is too big. */ \ + } \ + \ + pSampleData = (type*)drflac__malloc_from_callbacks((size_t)dataSize, &pFlac->allocationCallbacks); /* <-- Safe cast as per the check above. */ \ + if (pSampleData == NULL) { \ + goto on_error; \ + } \ + \ + totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData); \ + } \ + \ + if (sampleRateOut) *sampleRateOut = pFlac->sampleRate; \ + if (channelsOut) *channelsOut = pFlac->channels; \ + if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount; \ + \ + drflac_close(pFlac); \ + return pSampleData; \ + \ +on_error: \ + drflac_close(pFlac); \ + return NULL; \ +} + +DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s32, drflac_int32) +DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s16, drflac_int16) +DRFLAC_DEFINE_FULL_READ_AND_CLOSE(f32, float) + +DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalPCMFrameCountOut) { + *totalPCMFrameCountOut = 0; + } + + pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); +} + +DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalPCMFrameCountOut) { + *totalPCMFrameCountOut = 0; + } + + pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); +} + +DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (channelsOut) { + *channelsOut = 0; + } + if (sampleRateOut) { + *sampleRateOut = 0; + } + if (totalPCMFrameCountOut) { + *totalPCMFrameCountOut = 0; + } + + pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut); +} + +DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } + + pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount); +} + +DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } + + pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount); +} + +DRFLAC_API float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + drflac* pFlac; + + if (sampleRate) { + *sampleRate = 0; + } + if (channels) { + *channels = 0; + } + if (totalPCMFrameCount) { + *totalPCMFrameCount = 0; + } + + pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks); + if (pFlac == NULL) { + return NULL; + } + + return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount); +} + + +DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks) +{ + if (pAllocationCallbacks != NULL) { + drflac__free_from_callbacks(p, pAllocationCallbacks); + } else { + drflac__free_default(p, NULL); + } +} + + + + +DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments) +{ + if (pIter == NULL) { + return; + } + + pIter->countRemaining = commentCount; + pIter->pRunningData = (const char*)pComments; +} + +DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut) +{ + drflac_int32 length; + const char* pComment; + + /* Safety. */ + if (pCommentLengthOut) { + *pCommentLengthOut = 0; + } + + if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) { + return NULL; + } + + length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData); + pIter->pRunningData += 4; + + pComment = pIter->pRunningData; + pIter->pRunningData += length; + pIter->countRemaining -= 1; + + if (pCommentLengthOut) { + *pCommentLengthOut = length; + } + + return pComment; +} + + + + +DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData) +{ + if (pIter == NULL) { + return; + } + + pIter->countRemaining = trackCount; + pIter->pRunningData = (const char*)pTrackData; +} + +DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack) +{ + drflac_cuesheet_track cuesheetTrack; + const char* pRunningData; + drflac_uint64 offsetHi; + drflac_uint64 offsetLo; + + if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) { + return DRFLAC_FALSE; + } + + pRunningData = pIter->pRunningData; + + offsetHi = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + offsetLo = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4; + cuesheetTrack.offset = offsetLo | (offsetHi << 32); + cuesheetTrack.trackNumber = pRunningData[0]; pRunningData += 1; + DRFLAC_COPY_MEMORY(cuesheetTrack.ISRC, pRunningData, sizeof(cuesheetTrack.ISRC)); pRunningData += 12; + cuesheetTrack.isAudio = (pRunningData[0] & 0x80) != 0; + cuesheetTrack.preEmphasis = (pRunningData[0] & 0x40) != 0; pRunningData += 14; + cuesheetTrack.indexCount = pRunningData[0]; pRunningData += 1; + cuesheetTrack.pIndexPoints = (const drflac_cuesheet_track_index*)pRunningData; pRunningData += cuesheetTrack.indexCount * sizeof(drflac_cuesheet_track_index); + + pIter->pRunningData = pRunningData; + pIter->countRemaining -= 1; + + if (pCuesheetTrack) { + *pCuesheetTrack = cuesheetTrack; + } + + return DRFLAC_TRUE; +} + +#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))) + #pragma GCC diagnostic pop +#endif +#endif /* dr_flac_c */ +#endif /* DR_FLAC_IMPLEMENTATION */ + + +/* +REVISION HISTORY +================ +v0.12.28 - 2021-02-21 + - Fix a warning due to referencing _MSC_VER when it is undefined. + +v0.12.27 - 2021-01-31 + - Fix a static analysis warning. + +v0.12.26 - 2021-01-17 + - Fix a compilation warning due to _BSD_SOURCE being deprecated. + +v0.12.25 - 2020-12-26 + - Update documentation. + +v0.12.24 - 2020-11-29 + - Fix ARM64/NEON detection when compiling with MSVC. + +v0.12.23 - 2020-11-21 + - Fix compilation with OpenWatcom. + +v0.12.22 - 2020-11-01 + - Fix an error with the previous release. + +v0.12.21 - 2020-11-01 + - Fix a possible deadlock when seeking. + - Improve compiler support for older versions of GCC. + +v0.12.20 - 2020-09-08 + - Fix a compilation error on older compilers. + +v0.12.19 - 2020-08-30 + - Fix a bug due to an undefined 32-bit shift. + +v0.12.18 - 2020-08-14 + - Fix a crash when compiling with clang-cl. + +v0.12.17 - 2020-08-02 + - Simplify sized types. + +v0.12.16 - 2020-07-25 + - Fix a compilation warning. + +v0.12.15 - 2020-07-06 + - Check for negative LPC shifts and return an error. + +v0.12.14 - 2020-06-23 + - Add include guard for the implementation section. + +v0.12.13 - 2020-05-16 + - Add compile-time and run-time version querying. + - DRFLAC_VERSION_MINOR + - DRFLAC_VERSION_MAJOR + - DRFLAC_VERSION_REVISION + - DRFLAC_VERSION_STRING + - drflac_version() + - drflac_version_string() + +v0.12.12 - 2020-04-30 + - Fix compilation errors with VC6. + +v0.12.11 - 2020-04-19 + - Fix some pedantic warnings. + - Fix some undefined behaviour warnings. + +v0.12.10 - 2020-04-10 + - Fix some bugs when trying to seek with an invalid seek table. + +v0.12.9 - 2020-04-05 + - Fix warnings. + +v0.12.8 - 2020-04-04 + - Add drflac_open_file_w() and drflac_open_file_with_metadata_w(). + - Fix some static analysis warnings. + - Minor documentation updates. + +v0.12.7 - 2020-03-14 + - Fix compilation errors with VC6. + +v0.12.6 - 2020-03-07 + - Fix compilation error with Visual Studio .NET 2003. + +v0.12.5 - 2020-01-30 + - Silence some static analysis warnings. + +v0.12.4 - 2020-01-29 + - Silence some static analysis warnings. + +v0.12.3 - 2019-12-02 + - Fix some warnings when compiling with GCC and the -Og flag. + - Fix a crash in out-of-memory situations. + - Fix potential integer overflow bug. + - Fix some static analysis warnings. + - Fix a possible crash when using custom memory allocators without a custom realloc() implementation. + - Fix a bug with binary search seeking where the bits per sample is not a multiple of 8. + +v0.12.2 - 2019-10-07 + - Internal code clean up. + +v0.12.1 - 2019-09-29 + - Fix some Clang Static Analyzer warnings. + - Fix an unused variable warning. + +v0.12.0 - 2019-09-23 + - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation + routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs: + - drflac_open() + - drflac_open_relaxed() + - drflac_open_with_metadata() + - drflac_open_with_metadata_relaxed() + - drflac_open_file() + - drflac_open_file_with_metadata() + - drflac_open_memory() + - drflac_open_memory_with_metadata() + - drflac_open_and_read_pcm_frames_s32() + - drflac_open_and_read_pcm_frames_s16() + - drflac_open_and_read_pcm_frames_f32() + - drflac_open_file_and_read_pcm_frames_s32() + - drflac_open_file_and_read_pcm_frames_s16() + - drflac_open_file_and_read_pcm_frames_f32() + - drflac_open_memory_and_read_pcm_frames_s32() + - drflac_open_memory_and_read_pcm_frames_s16() + - drflac_open_memory_and_read_pcm_frames_f32() + Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use + DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE. + - Remove deprecated APIs: + - drflac_read_s32() + - drflac_read_s16() + - drflac_read_f32() + - drflac_seek_to_sample() + - drflac_open_and_decode_s32() + - drflac_open_and_decode_s16() + - drflac_open_and_decode_f32() + - drflac_open_and_decode_file_s32() + - drflac_open_and_decode_file_s16() + - drflac_open_and_decode_file_f32() + - drflac_open_and_decode_memory_s32() + - drflac_open_and_decode_memory_s16() + - drflac_open_and_decode_memory_f32() + - Remove drflac.totalSampleCount which is now replaced with drflac.totalPCMFrameCount. You can emulate drflac.totalSampleCount + by doing pFlac->totalPCMFrameCount*pFlac->channels. + - Rename drflac.currentFrame to drflac.currentFLACFrame to remove ambiguity with PCM frames. + - Fix errors when seeking to the end of a stream. + - Optimizations to seeking. + - SSE improvements and optimizations. + - ARM NEON optimizations. + - Optimizations to drflac_read_pcm_frames_s16(). + - Optimizations to drflac_read_pcm_frames_s32(). + +v0.11.10 - 2019-06-26 + - Fix a compiler error. + +v0.11.9 - 2019-06-16 + - Silence some ThreadSanitizer warnings. + +v0.11.8 - 2019-05-21 + - Fix warnings. + +v0.11.7 - 2019-05-06 + - C89 fixes. + +v0.11.6 - 2019-05-05 + - Add support for C89. + - Fix a compiler warning when CRC is disabled. + - Change license to choice of public domain or MIT-0. + +v0.11.5 - 2019-04-19 + - Fix a compiler error with GCC. + +v0.11.4 - 2019-04-17 + - Fix some warnings with GCC when compiling with -std=c99. + +v0.11.3 - 2019-04-07 + - Silence warnings with GCC. + +v0.11.2 - 2019-03-10 + - Fix a warning. + +v0.11.1 - 2019-02-17 + - Fix a potential bug with seeking. + +v0.11.0 - 2018-12-16 + - API CHANGE: Deprecated drflac_read_s32(), drflac_read_s16() and drflac_read_f32() and replaced them with + drflac_read_pcm_frames_s32(), drflac_read_pcm_frames_s16() and drflac_read_pcm_frames_f32(). The new APIs take + and return PCM frame counts instead of sample counts. To upgrade you will need to change the input count by + dividing it by the channel count, and then do the same with the return value. + - API_CHANGE: Deprecated drflac_seek_to_sample() and replaced with drflac_seek_to_pcm_frame(). Same rules as + the changes to drflac_read_*() apply. + - API CHANGE: Deprecated drflac_open_and_decode_*() and replaced with drflac_open_*_and_read_*(). Same rules as + the changes to drflac_read_*() apply. + - Optimizations. + +v0.10.0 - 2018-09-11 + - Remove the DR_FLAC_NO_WIN32_IO option and the Win32 file IO functionality. If you need to use Win32 file IO you + need to do it yourself via the callback API. + - Fix the clang build. + - Fix undefined behavior. + - Fix errors with CUESHEET metdata blocks. + - Add an API for iterating over each cuesheet track in the CUESHEET metadata block. This works the same way as the + Vorbis comment API. + - Other miscellaneous bug fixes, mostly relating to invalid FLAC streams. + - Minor optimizations. + +v0.9.11 - 2018-08-29 + - Fix a bug with sample reconstruction. + +v0.9.10 - 2018-08-07 + - Improve 64-bit detection. + +v0.9.9 - 2018-08-05 + - Fix C++ build on older versions of GCC. + +v0.9.8 - 2018-07-24 + - Fix compilation errors. + +v0.9.7 - 2018-07-05 + - Fix a warning. + +v0.9.6 - 2018-06-29 + - Fix some typos. + +v0.9.5 - 2018-06-23 + - Fix some warnings. + +v0.9.4 - 2018-06-14 + - Optimizations to seeking. + - Clean up. + +v0.9.3 - 2018-05-22 + - Bug fix. + +v0.9.2 - 2018-05-12 + - Fix a compilation error due to a missing break statement. + +v0.9.1 - 2018-04-29 + - Fix compilation error with Clang. + +v0.9 - 2018-04-24 + - Fix Clang build. + - Start using major.minor.revision versioning. + +v0.8g - 2018-04-19 + - Fix build on non-x86/x64 architectures. + +v0.8f - 2018-02-02 + - Stop pretending to support changing rate/channels mid stream. + +v0.8e - 2018-02-01 + - Fix a crash when the block size of a frame is larger than the maximum block size defined by the FLAC stream. + - Fix a crash the the Rice partition order is invalid. + +v0.8d - 2017-09-22 + - Add support for decoding streams with ID3 tags. ID3 tags are just skipped. + +v0.8c - 2017-09-07 + - Fix warning on non-x86/x64 architectures. + +v0.8b - 2017-08-19 + - Fix build on non-x86/x64 architectures. + +v0.8a - 2017-08-13 + - A small optimization for the Clang build. + +v0.8 - 2017-08-12 + - API CHANGE: Rename dr_* types to drflac_*. + - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation. + - Add support for custom implementations of malloc(), realloc(), etc. + - Add CRC checking to Ogg encapsulated streams. + - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported. + - Bug fixes. + +v0.7 - 2017-07-23 + - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed(). + +v0.6 - 2017-07-22 + - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they + never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame. + +v0.5 - 2017-07-16 + - Fix typos. + - Change drflac_bool* types to unsigned. + - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC. + +v0.4f - 2017-03-10 + - Fix a couple of bugs with the bitstreaming code. + +v0.4e - 2017-02-17 + - Fix some warnings. + +v0.4d - 2016-12-26 + - Add support for 32-bit floating-point PCM decoding. + - Use drflac_int* and drflac_uint* sized types to improve compiler support. + - Minor improvements to documentation. + +v0.4c - 2016-12-26 + - Add support for signed 16-bit integer PCM decoding. + +v0.4b - 2016-10-23 + - A minor change to drflac_bool8 and drflac_bool32 types. + +v0.4a - 2016-10-11 + - Rename drBool32 to drflac_bool32 for styling consistency. + +v0.4 - 2016-09-29 + - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type. + - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32(). + - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to + keep it consistent with drflac_audio. + +v0.3f - 2016-09-21 + - Fix a warning with GCC. + +v0.3e - 2016-09-18 + - Fixed a bug where GCC 4.3+ was not getting properly identified. + - Fixed a few typos. + - Changed date formats to ISO 8601 (YYYY-MM-DD). + +v0.3d - 2016-06-11 + - Minor clean up. + +v0.3c - 2016-05-28 + - Fixed compilation error. + +v0.3b - 2016-05-16 + - Fixed Linux/GCC build. + - Updated documentation. + +v0.3a - 2016-05-15 + - Minor fixes to documentation. + +v0.3 - 2016-05-11 + - Optimizations. Now at about parity with the reference implementation on 32-bit builds. + - Lots of clean up. + +v0.2b - 2016-05-10 + - Bug fixes. + +v0.2a - 2016-05-10 + - Made drflac_open_and_decode() more robust. + - Removed an unused debugging variable + +v0.2 - 2016-05-09 + - Added support for Ogg encapsulation. + - API CHANGE. Have the onSeek callback take a third argument which specifies whether or not the seek + should be relative to the start or the current position. Also changes the seeking rules such that + seeking offsets will never be negative. + - Have drflac_open_and_decode() fail gracefully if the stream has an unknown total sample count. + +v0.1b - 2016-05-07 + - Properly close the file handle in drflac_open_file() and family when the decoder fails to initialize. + - Removed a stale comment. + +v0.1a - 2016-05-05 + - Minor formatting changes. + - Fixed a warning on the GCC build. + +v0.1 - 2016-05-03 + - Initial versioned release. +*/ + +/* +This software is available as a choice of the following licenses. Choose +whichever you prefer. + +=============================================================================== +ALTERNATIVE 1 - Public Domain (www.unlicense.org) +=============================================================================== +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. + +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to + +=============================================================================== +ALTERNATIVE 2 - MIT No Attribution +=============================================================================== +Copyright 2020 David Reid + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ diff --git a/cores/saturn/deps/libchdr/include/libchdr/bitstream.h b/cores/saturn/deps/libchdr/include/libchdr/bitstream.h new file mode 100644 index 000000000..d376373b7 --- /dev/null +++ b/cores/saturn/deps/libchdr/include/libchdr/bitstream.h @@ -0,0 +1,43 @@ +/* license:BSD-3-Clause + * copyright-holders:Aaron Giles +*************************************************************************** + + bitstream.h + + Helper classes for reading/writing at the bit level. + +***************************************************************************/ + +#pragma once + +#ifndef __BITSTREAM_H__ +#define __BITSTREAM_H__ + +#include + +/*************************************************************************** + * TYPE DEFINITIONS + *************************************************************************** + */ + +/* helper class for reading from a bit buffer */ +struct bitstream +{ + uint32_t buffer; /* current bit accumulator */ + int bits; /* number of bits in the accumulator */ + const uint8_t * read; /* read pointer */ + uint32_t doffset; /* byte offset within the data */ + uint32_t dlength; /* length of the data */ +}; + +struct bitstream* create_bitstream(const void *src, uint32_t srclength); +int bitstream_overflow(struct bitstream* bitstream); +uint32_t bitstream_read_offset(struct bitstream* bitstream); + +uint32_t bitstream_read(struct bitstream* bitstream, int numbits); +uint32_t bitstream_peek(struct bitstream* bitstream, int numbits); +void bitstream_remove(struct bitstream* bitstream, int numbits); +uint32_t bitstream_flush(struct bitstream* bitstream); + + +#endif diff --git a/cores/saturn/deps/libchdr/include/libchdr/cdrom.h b/cores/saturn/deps/libchdr/include/libchdr/cdrom.h new file mode 100644 index 000000000..816e6a5c4 --- /dev/null +++ b/cores/saturn/deps/libchdr/include/libchdr/cdrom.h @@ -0,0 +1,110 @@ +/* license:BSD-3-Clause + * copyright-holders:Aaron Giles +*************************************************************************** + + cdrom.h + + Generic MAME cd-rom implementation + +***************************************************************************/ + +#pragma once + +#ifndef __CDROM_H__ +#define __CDROM_H__ + +#include +#include + +/*************************************************************************** + CONSTANTS +***************************************************************************/ + +/* tracks are padded to a multiple of this many frames */ +#define CD_TRACK_PADDING (4) +#define CD_MAX_TRACKS (99) /* AFAIK the theoretical limit */ +#define CD_MAX_SECTOR_DATA (2352) +#define CD_MAX_SUBCODE_DATA (96) + +#define CD_FRAME_SIZE (CD_MAX_SECTOR_DATA + CD_MAX_SUBCODE_DATA) +#define CD_FRAMES_PER_HUNK (8) + +#define CD_METADATA_WORDS (1+(CD_MAX_TRACKS * 6)) + +enum +{ + CD_TRACK_MODE1 = 0, /* mode 1 2048 bytes/sector */ + CD_TRACK_MODE1_RAW, /* mode 1 2352 bytes/sector */ + CD_TRACK_MODE2, /* mode 2 2336 bytes/sector */ + CD_TRACK_MODE2_FORM1, /* mode 2 2048 bytes/sector */ + CD_TRACK_MODE2_FORM2, /* mode 2 2324 bytes/sector */ + CD_TRACK_MODE2_FORM_MIX, /* mode 2 2336 bytes/sector */ + CD_TRACK_MODE2_RAW, /* mode 2 2352 bytes / sector */ + CD_TRACK_AUDIO, /* redbook audio track 2352 bytes/sector (588 samples) */ + + CD_TRACK_RAW_DONTCARE /* special flag for cdrom_read_data: just return me whatever is there */ +}; + +enum +{ + CD_SUB_NORMAL = 0, /* "cooked" 96 bytes per sector */ + CD_SUB_RAW, /* raw uninterleaved 96 bytes per sector */ + CD_SUB_NONE /* no subcode data stored */ +}; + +#define CD_FLAG_GDROM 0x00000001 /* disc is a GD-ROM, all tracks should be stored with GD-ROM metadata */ +#define CD_FLAG_GDROMLE 0x00000002 /* legacy GD-ROM, with little-endian CDDA data */ + +/*************************************************************************** + FUNCTION PROTOTYPES +***************************************************************************/ + +#ifdef WANT_RAW_DATA_SECTOR +/* ECC utilities */ +int ecc_verify(const uint8_t *sector); +void ecc_generate(uint8_t *sector); +void ecc_clear(uint8_t *sector); +#endif + + + +/*************************************************************************** + INLINE FUNCTIONS +***************************************************************************/ + +static inline uint32_t msf_to_lba(uint32_t msf) +{ + return ( ((msf&0x00ff0000)>>16) * 60 * 75) + (((msf&0x0000ff00)>>8) * 75) + ((msf&0x000000ff)>>0); +} + +static inline uint32_t lba_to_msf(uint32_t lba) +{ + uint8_t m, s, f; + + m = lba / (60 * 75); + lba -= m * (60 * 75); + s = lba / 75; + f = lba % 75; + + return ((m / 10) << 20) | ((m % 10) << 16) | + ((s / 10) << 12) | ((s % 10) << 8) | + ((f / 10) << 4) | ((f % 10) << 0); +} + +/** + * segacd needs it like this.. investigate + * Angelo also says PCE tracks often start playing at the + * wrong address.. related? + **/ +static inline uint32_t lba_to_msf_alt(int lba) +{ + uint32_t ret = 0; + + ret |= ((lba / (60 * 75))&0xff)<<16; + ret |= (((lba / 75) % 60)&0xff)<<8; + ret |= ((lba % 75)&0xff)<<0; + + return ret; +} + +#endif /* __CDROM_H__ */ diff --git a/cores/saturn/deps/libchdr/include/libchdr/chd.h b/cores/saturn/deps/libchdr/include/libchdr/chd.h new file mode 100644 index 000000000..61b149dcf --- /dev/null +++ b/cores/saturn/deps/libchdr/include/libchdr/chd.h @@ -0,0 +1,425 @@ +/*************************************************************************** + + chd.h + + MAME Compressed Hunks of Data file format + +**************************************************************************** + + Copyright Aaron Giles + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name 'MAME' nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY AARON GILES ''AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL AARON GILES BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ + +#pragma once + +#ifndef __CHD_H__ +#define __CHD_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/*************************************************************************** + + Compressed Hunks of Data header format. All numbers are stored in + Motorola (big-endian) byte ordering. The header is 76 (V1) or 80 (V2) + bytes long. + + V1 header: + + [ 0] char tag[8]; // 'MComprHD' + [ 8] UINT32 length; // length of header (including tag and length fields) + [ 12] UINT32 version; // drive format version + [ 16] UINT32 flags; // flags (see below) + [ 20] UINT32 compression; // compression type + [ 24] UINT32 hunksize; // 512-byte sectors per hunk + [ 28] UINT32 totalhunks; // total # of hunks represented + [ 32] UINT32 cylinders; // number of cylinders on hard disk + [ 36] UINT32 heads; // number of heads on hard disk + [ 40] UINT32 sectors; // number of sectors on hard disk + [ 44] UINT8 md5[16]; // MD5 checksum of raw data + [ 60] UINT8 parentmd5[16]; // MD5 checksum of parent file + [ 76] (V1 header length) + + V2 header: + + [ 0] char tag[8]; // 'MComprHD' + [ 8] UINT32 length; // length of header (including tag and length fields) + [ 12] UINT32 version; // drive format version + [ 16] UINT32 flags; // flags (see below) + [ 20] UINT32 compression; // compression type + [ 24] UINT32 hunksize; // seclen-byte sectors per hunk + [ 28] UINT32 totalhunks; // total # of hunks represented + [ 32] UINT32 cylinders; // number of cylinders on hard disk + [ 36] UINT32 heads; // number of heads on hard disk + [ 40] UINT32 sectors; // number of sectors on hard disk + [ 44] UINT8 md5[16]; // MD5 checksum of raw data + [ 60] UINT8 parentmd5[16]; // MD5 checksum of parent file + [ 76] UINT32 seclen; // number of bytes per sector + [ 80] (V2 header length) + + V3 header: + + [ 0] char tag[8]; // 'MComprHD' + [ 8] UINT32 length; // length of header (including tag and length fields) + [ 12] UINT32 version; // drive format version + [ 16] UINT32 flags; // flags (see below) + [ 20] UINT32 compression; // compression type + [ 24] UINT32 totalhunks; // total # of hunks represented + [ 28] UINT64 logicalbytes; // logical size of the data (in bytes) + [ 36] UINT64 metaoffset; // offset to the first blob of metadata + [ 44] UINT8 md5[16]; // MD5 checksum of raw data + [ 60] UINT8 parentmd5[16]; // MD5 checksum of parent file + [ 76] UINT32 hunkbytes; // number of bytes per hunk + [ 80] UINT8 sha1[20]; // SHA1 checksum of raw data + [100] UINT8 parentsha1[20];// SHA1 checksum of parent file + [120] (V3 header length) + + V4 header: + + [ 0] char tag[8]; // 'MComprHD' + [ 8] UINT32 length; // length of header (including tag and length fields) + [ 12] UINT32 version; // drive format version + [ 16] UINT32 flags; // flags (see below) + [ 20] UINT32 compression; // compression type + [ 24] UINT32 totalhunks; // total # of hunks represented + [ 28] UINT64 logicalbytes; // logical size of the data (in bytes) + [ 36] UINT64 metaoffset; // offset to the first blob of metadata + [ 44] UINT32 hunkbytes; // number of bytes per hunk + [ 48] UINT8 sha1[20]; // combined raw+meta SHA1 + [ 68] UINT8 parentsha1[20];// combined raw+meta SHA1 of parent + [ 88] UINT8 rawsha1[20]; // raw data SHA1 + [108] (V4 header length) + + Flags: + 0x00000001 - set if this drive has a parent + 0x00000002 - set if this drive allows writes + + ========================================================================= + + V5 header: + + [ 0] char tag[8]; // 'MComprHD' + [ 8] uint32_t length; // length of header (including tag and length fields) + [ 12] uint32_t version; // drive format version + [ 16] uint32_t compressors[4];// which custom compressors are used? + [ 32] uint64_t logicalbytes; // logical size of the data (in bytes) + [ 40] uint64_t mapoffset; // offset to the map + [ 48] uint64_t metaoffset; // offset to the first blob of metadata + [ 56] uint32_t hunkbytes; // number of bytes per hunk (512k maximum) + [ 60] uint32_t unitbytes; // number of bytes per unit within each hunk + [ 64] uint8_t rawsha1[20]; // raw data SHA1 + [ 84] uint8_t sha1[20]; // combined raw+meta SHA1 + [104] uint8_t parentsha1[20];// combined raw+meta SHA1 of parent + [124] (V5 header length) + + If parentsha1 != 0, we have a parent (no need for flags) + If compressors[0] == 0, we are uncompressed (including maps) + + V5 uncompressed map format: + + [ 0] uint32_t offset; // starting offset / hunk size + + V5 compressed map format header: + + [ 0] uint32_t length; // length of compressed map + [ 4] UINT48 datastart; // offset of first block + [ 10] uint16_t crc; // crc-16 of the map + [ 12] uint8_t lengthbits; // bits used to encode complength + [ 13] uint8_t hunkbits; // bits used to encode self-refs + [ 14] uint8_t parentunitbits; // bits used to encode parent unit refs + [ 15] uint8_t reserved; // future use + [ 16] (compressed header length) + + Each compressed map entry, once expanded, looks like: + + [ 0] uint8_t compression; // compression type + [ 1] UINT24 complength; // compressed length + [ 4] UINT48 offset; // offset + [ 10] uint16_t crc; // crc-16 of the data + +***************************************************************************/ + + +/*************************************************************************** + CONSTANTS +***************************************************************************/ + +/* header information */ +#define CHD_HEADER_VERSION 5 +#define CHD_V1_HEADER_SIZE 76 +#define CHD_V2_HEADER_SIZE 80 +#define CHD_V3_HEADER_SIZE 120 +#define CHD_V4_HEADER_SIZE 108 +#define CHD_V5_HEADER_SIZE 124 + +#define CHD_MAX_HEADER_SIZE CHD_V5_HEADER_SIZE + +/* checksumming information */ +#define CHD_MD5_BYTES 16 +#define CHD_SHA1_BYTES 20 + +/* CHD global flags */ +#define CHDFLAGS_HAS_PARENT 0x00000001 +#define CHDFLAGS_IS_WRITEABLE 0x00000002 +#define CHDFLAGS_UNDEFINED 0xfffffffc + +#define CHD_MAKE_TAG(a,b,c,d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d)) + +/* compression types */ +#define CHDCOMPRESSION_NONE 0 +#define CHDCOMPRESSION_ZLIB 1 +#define CHDCOMPRESSION_ZLIB_PLUS 2 +#define CHDCOMPRESSION_AV 3 + +#define CHD_CODEC_NONE 0 +#define CHD_CODEC_ZLIB CHD_MAKE_TAG('z','l','i','b') +/* general codecs with CD frontend */ +#define CHD_CODEC_CD_ZLIB CHD_MAKE_TAG('c','d','z','l') +#define CHD_CODEC_CD_LZMA CHD_MAKE_TAG('c','d','l','z') +#define CHD_CODEC_CD_FLAC CHD_MAKE_TAG('c','d','f','l') + +/* A/V codec configuration parameters */ +#define AV_CODEC_COMPRESS_CONFIG 1 +#define AV_CODEC_DECOMPRESS_CONFIG 2 + +/* metadata parameters */ +#define CHDMETATAG_WILDCARD 0 +#define CHD_METAINDEX_APPEND ((UINT32)-1) + +/* metadata flags */ +#define CHD_MDFLAGS_CHECKSUM 0x01 /* indicates data is checksummed */ + +/* standard hard disk metadata */ +#define HARD_DISK_METADATA_TAG CHD_MAKE_TAG('G','D','D','D') +#define HARD_DISK_METADATA_FORMAT "CYLS:%d,HEADS:%d,SECS:%d,BPS:%d" + +/* hard disk identify information */ +#define HARD_DISK_IDENT_METADATA_TAG CHD_MAKE_TAG('I','D','N','T') + +/* hard disk key information */ +#define HARD_DISK_KEY_METADATA_TAG CHD_MAKE_TAG('K','E','Y',' ') + +/* pcmcia CIS information */ +#define PCMCIA_CIS_METADATA_TAG CHD_MAKE_TAG('C','I','S',' ') + +/* standard CD-ROM metadata */ +#define CDROM_OLD_METADATA_TAG CHD_MAKE_TAG('C','H','C','D') +#define CDROM_TRACK_METADATA_TAG CHD_MAKE_TAG('C','H','T','R') +#define CDROM_TRACK_METADATA_FORMAT "TRACK:%d TYPE:%s SUBTYPE:%s FRAMES:%d" +#define CDROM_TRACK_METADATA2_TAG CHD_MAKE_TAG('C','H','T','2') +#define CDROM_TRACK_METADATA2_FORMAT "TRACK:%d TYPE:%s SUBTYPE:%s FRAMES:%d PREGAP:%d PGTYPE:%s PGSUB:%s POSTGAP:%d" +#define GDROM_OLD_METADATA_TAG CHD_MAKE_TAG('C','H','G','T') +#define GDROM_TRACK_METADATA_TAG CHD_MAKE_TAG('C', 'H', 'G', 'D') +#define GDROM_TRACK_METADATA_FORMAT "TRACK:%d TYPE:%s SUBTYPE:%s FRAMES:%d PAD:%d PREGAP:%d PGTYPE:%s PGSUB:%s POSTGAP:%d" + +/* standard A/V metadata */ +#define AV_METADATA_TAG CHD_MAKE_TAG('A','V','A','V') +#define AV_METADATA_FORMAT "FPS:%d.%06d WIDTH:%d HEIGHT:%d INTERLACED:%d CHANNELS:%d SAMPLERATE:%d" + +/* A/V laserdisc frame metadata */ +#define AV_LD_METADATA_TAG CHD_MAKE_TAG('A','V','L','D') + +/* CHD open values */ +#define CHD_OPEN_READ 1 +#define CHD_OPEN_READWRITE 2 + +/* error types */ +enum _chd_error +{ + CHDERR_NONE, + CHDERR_NO_INTERFACE, + CHDERR_OUT_OF_MEMORY, + CHDERR_INVALID_FILE, + CHDERR_INVALID_PARAMETER, + CHDERR_INVALID_DATA, + CHDERR_FILE_NOT_FOUND, + CHDERR_REQUIRES_PARENT, + CHDERR_FILE_NOT_WRITEABLE, + CHDERR_READ_ERROR, + CHDERR_WRITE_ERROR, + CHDERR_CODEC_ERROR, + CHDERR_INVALID_PARENT, + CHDERR_HUNK_OUT_OF_RANGE, + CHDERR_DECOMPRESSION_ERROR, + CHDERR_COMPRESSION_ERROR, + CHDERR_CANT_CREATE_FILE, + CHDERR_CANT_VERIFY, + CHDERR_NOT_SUPPORTED, + CHDERR_METADATA_NOT_FOUND, + CHDERR_INVALID_METADATA_SIZE, + CHDERR_UNSUPPORTED_VERSION, + CHDERR_VERIFY_INCOMPLETE, + CHDERR_INVALID_METADATA, + CHDERR_INVALID_STATE, + CHDERR_OPERATION_PENDING, + CHDERR_NO_ASYNC_OPERATION, + CHDERR_UNSUPPORTED_FORMAT +}; +typedef enum _chd_error chd_error; + + + +/*************************************************************************** + TYPE DEFINITIONS +***************************************************************************/ + +/* opaque types */ +typedef struct _chd_file chd_file; + + +/* extract header structure (NOT the on-disk header structure) */ +typedef struct _chd_header chd_header; +struct _chd_header +{ + UINT32 length; /* length of header data */ + UINT32 version; /* drive format version */ + UINT32 flags; /* flags field */ + UINT32 compression[4]; /* compression type */ + UINT32 hunkbytes; /* number of bytes per hunk */ + UINT32 totalhunks; /* total # of hunks represented */ + UINT64 logicalbytes; /* logical size of the data */ + UINT64 metaoffset; /* offset in file of first metadata */ + UINT64 mapoffset; /* TOOD V5 */ + UINT8 md5[CHD_MD5_BYTES]; /* overall MD5 checksum */ + UINT8 parentmd5[CHD_MD5_BYTES]; /* overall MD5 checksum of parent */ + UINT8 sha1[CHD_SHA1_BYTES]; /* overall SHA1 checksum */ + UINT8 rawsha1[CHD_SHA1_BYTES]; /* SHA1 checksum of raw data */ + UINT8 parentsha1[CHD_SHA1_BYTES]; /* overall SHA1 checksum of parent */ + UINT32 unitbytes; /* TODO V5 */ + UINT64 unitcount; /* TODO V5 */ + UINT32 hunkcount; /* TODO V5 */ + + /* map information */ + UINT32 mapentrybytes; /* length of each entry in a map (V5) */ + UINT8* rawmap; /* raw map data */ + + UINT32 obsolete_cylinders; /* obsolete field -- do not use! */ + UINT32 obsolete_sectors; /* obsolete field -- do not use! */ + UINT32 obsolete_heads; /* obsolete field -- do not use! */ + UINT32 obsolete_hunksize; /* obsolete field -- do not use! */ +}; + + +/* structure for returning information about a verification pass */ +typedef struct _chd_verify_result chd_verify_result; +struct _chd_verify_result +{ + UINT8 md5[CHD_MD5_BYTES]; /* overall MD5 checksum */ + UINT8 sha1[CHD_SHA1_BYTES]; /* overall SHA1 checksum */ + UINT8 rawsha1[CHD_SHA1_BYTES]; /* SHA1 checksum of raw data */ + UINT8 metasha1[CHD_SHA1_BYTES]; /* SHA1 checksum of metadata */ +}; + + + +/*************************************************************************** + FUNCTION PROTOTYPES +***************************************************************************/ + +#ifdef _MSC_VER +#ifdef CHD_DLL +#ifdef CHD_DLL_EXPORTS +#define CHD_EXPORT __declspec(dllexport) +#else +#define CHD_EXPORT __declspec(dllimport) +#endif +#else +#define CHD_EXPORT +#endif +#else +#define CHD_EXPORT __attribute__ ((visibility("default"))) +#endif + +/* ----- CHD file management ----- */ + +/* create a new CHD file fitting the given description */ +/* chd_error chd_create(const char *filename, UINT64 logicalbytes, UINT32 hunkbytes, UINT32 compression, chd_file *parent); */ + +/* same as chd_create(), but accepts an already-opened core_file object */ +/* chd_error chd_create_file(core_file *file, UINT64 logicalbytes, UINT32 hunkbytes, UINT32 compression, chd_file *parent); */ + +/* open an existing CHD file */ +CHD_EXPORT chd_error chd_open_file(core_file *file, int mode, chd_file *parent, chd_file **chd); +CHD_EXPORT chd_error chd_open(const char *filename, int mode, chd_file *parent, chd_file **chd); + +/* precache underlying file */ +CHD_EXPORT chd_error chd_precache(chd_file *chd); + +/* close a CHD file */ +CHD_EXPORT void chd_close(chd_file *chd); + +/* return the associated core_file */ +CHD_EXPORT core_file *chd_core_file(chd_file *chd); + +/* return an error string for the given CHD error */ +CHD_EXPORT const char *chd_error_string(chd_error err); + + + +/* ----- CHD header management ----- */ + +/* return a pointer to the extracted CHD header data */ +CHD_EXPORT const chd_header *chd_get_header(chd_file *chd); + + + + +/* ----- core data read/write ----- */ + +/* read one hunk from the CHD file */ +CHD_EXPORT chd_error chd_read(chd_file *chd, UINT32 hunknum, void *buffer); + + + +/* ----- metadata management ----- */ + +/* get indexed metadata of a particular sort */ +CHD_EXPORT chd_error chd_get_metadata(chd_file *chd, UINT32 searchtag, UINT32 searchindex, void *output, UINT32 outputlen, UINT32 *resultlen, UINT32 *resulttag, UINT8 *resultflags); + + + + +/* ----- codec interfaces ----- */ + +/* set internal codec parameters */ +CHD_EXPORT chd_error chd_codec_config(chd_file *chd, int param, void *config); + +/* return a string description of a codec */ +CHD_EXPORT const char *chd_get_codec_name(UINT32 codec); + +#ifdef __cplusplus +} +#endif + +#endif /* __CHD_H__ */ diff --git a/cores/saturn/deps/libchdr/include/libchdr/chdconfig.h b/cores/saturn/deps/libchdr/include/libchdr/chdconfig.h new file mode 100644 index 000000000..752038b48 --- /dev/null +++ b/cores/saturn/deps/libchdr/include/libchdr/chdconfig.h @@ -0,0 +1,10 @@ +#ifndef __CHDCONFIG_H__ +#define __CHDCONFIG_H__ + +/* Configure CHDR features here */ +#define WANT_RAW_DATA_SECTOR 1 +#define WANT_SUBCODE 1 +#define NEED_CACHE_HUNK 1 +#define VERIFY_BLOCK_CRC 1 + +#endif diff --git a/cores/saturn/deps/libchdr/include/libchdr/coretypes.h b/cores/saturn/deps/libchdr/include/libchdr/coretypes.h new file mode 100644 index 000000000..30f892f6e --- /dev/null +++ b/cores/saturn/deps/libchdr/include/libchdr/coretypes.h @@ -0,0 +1,48 @@ +#ifndef __CORETYPES_H__ +#define __CORETYPES_H__ + +#include +#include + +#ifdef USE_LIBRETRO_VFS +#include +#endif + +#define ARRAY_LENGTH(x) (sizeof(x)/sizeof(x[0])) + +typedef uint64_t UINT64; +typedef uint32_t UINT32; +typedef uint16_t UINT16; +typedef uint8_t UINT8; + +typedef int64_t INT64; +typedef int32_t INT32; +typedef int16_t INT16; +typedef int8_t INT8; + +#define core_file FILE +#define core_fopen(file) fopen(file, "rb") +#if defined(__WIN32__) || defined(_WIN32) || defined(WIN32) || defined(__WIN64__) + #define core_fseek _fseeki64 + #define core_ftell _ftelli64 +#elif defined(_LARGEFILE_SOURCE) && defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 + #define core_fseek fseeko64 + #define core_ftell ftello64 +#else + #define core_fseek fseeko + #define core_ftell ftello +#endif +#define core_fread(fc, buff, len) fread(buff, 1, len, fc) +#define core_fclose fclose + +static UINT64 core_fsize(core_file *f) +{ + UINT64 rv; + UINT64 p = core_ftell(f); + core_fseek(f, 0, SEEK_END); + rv = core_ftell(f); + core_fseek(f, p, SEEK_SET); + return rv; +} + +#endif diff --git a/cores/saturn/deps/libchdr/include/libchdr/flac.h b/cores/saturn/deps/libchdr/include/libchdr/flac.h new file mode 100644 index 000000000..bff255b21 --- /dev/null +++ b/cores/saturn/deps/libchdr/include/libchdr/flac.h @@ -0,0 +1,50 @@ +/* license:BSD-3-Clause + * copyright-holders:Aaron Giles + *************************************************************************** + + flac.h + + FLAC compression wrappers + +***************************************************************************/ + +#pragma once + +#ifndef __FLAC_H__ +#define __FLAC_H__ + +#include + +/*************************************************************************** + * TYPE DEFINITIONS + *************************************************************************** + */ + +typedef struct _flac_decoder flac_decoder; +struct _flac_decoder { + /* output state */ + void * decoder; /* actual encoder */ + uint32_t sample_rate; /* decoded sample rate */ + uint8_t channels; /* decoded number of channels */ + uint8_t bits_per_sample; /* decoded bits per sample */ + uint32_t compressed_offset; /* current offset in compressed data */ + const uint8_t * compressed_start; /* start of compressed data */ + uint32_t compressed_length; /* length of compressed data */ + const uint8_t * compressed2_start; /* start of compressed data */ + uint32_t compressed2_length; /* length of compressed data */ + int16_t * uncompressed_start[8]; /* pointer to start of uncompressed data (up to 8 streams) */ + uint32_t uncompressed_offset; /* current position in uncompressed data */ + uint32_t uncompressed_length; /* length of uncompressed data */ + int uncompressed_swap; /* swap uncompressed sample data */ + uint8_t custom_header[0x2a]; /* custom header */ +}; + +/* ======================> flac_decoder */ + +int flac_decoder_init(flac_decoder* decoder); +void flac_decoder_free(flac_decoder* decoder); +int flac_decoder_reset(flac_decoder* decoder, uint32_t sample_rate, uint8_t num_channels, uint32_t block_size, const void *buffer, uint32_t length); +int flac_decoder_decode_interleaved(flac_decoder* decoder, int16_t *samples, uint32_t num_samples, int swap_endian); +uint32_t flac_decoder_finish(flac_decoder* decoder); + +#endif /* __FLAC_H__ */ diff --git a/cores/saturn/deps/libchdr/include/libchdr/huffman.h b/cores/saturn/deps/libchdr/include/libchdr/huffman.h new file mode 100644 index 000000000..6c9f51136 --- /dev/null +++ b/cores/saturn/deps/libchdr/include/libchdr/huffman.h @@ -0,0 +1,90 @@ +/* license:BSD-3-Clause + * copyright-holders:Aaron Giles + *************************************************************************** + + huffman.h + + Static Huffman compression and decompression helpers. + +***************************************************************************/ + +#pragma once + +#ifndef __HUFFMAN_H__ +#define __HUFFMAN_H__ + +#include + + +/*************************************************************************** + * CONSTANTS + *************************************************************************** + */ + +enum huffman_error +{ + HUFFERR_NONE = 0, + HUFFERR_TOO_MANY_BITS, + HUFFERR_INVALID_DATA, + HUFFERR_INPUT_BUFFER_TOO_SMALL, + HUFFERR_OUTPUT_BUFFER_TOO_SMALL, + HUFFERR_INTERNAL_INCONSISTENCY, + HUFFERR_TOO_MANY_CONTEXTS +}; + +/*************************************************************************** + * TYPE DEFINITIONS + *************************************************************************** + */ + +typedef uint16_t lookup_value; + +/* a node in the huffman tree */ +struct node_t +{ + struct node_t* parent; /* pointer to parent node */ + uint32_t count; /* number of hits on this node */ + uint32_t weight; /* assigned weight of this node */ + uint32_t bits; /* bits used to encode the node */ + uint8_t numbits; /* number of bits needed for this node */ +}; + +/* ======================> huffman_context_base */ + +/* context class for decoding */ +struct huffman_decoder +{ + /* internal state */ + uint32_t numcodes; /* number of total codes being processed */ + uint8_t maxbits; /* maximum bits per code */ + uint8_t prevdata; /* value of the previous data (for delta-RLE encoding) */ + int rleremaining; /* number of RLE bytes remaining (for delta-RLE encoding) */ + lookup_value * lookup; /* pointer to the lookup table */ + struct node_t * huffnode; /* array of nodes */ + uint32_t * datahisto; /* histogram of data values */ + + /* array versions of the info we need */ +#if 0 + node_t* huffnode_array; /* [_NumCodes]; */ + lookup_value* lookup_array; /* [1 << _MaxBits]; */ +#endif +}; + +/* ======================> huffman_decoder */ + +struct huffman_decoder* create_huffman_decoder(int numcodes, int maxbits); +void delete_huffman_decoder(struct huffman_decoder* decoder); + +/* single item operations */ +uint32_t huffman_decode_one(struct huffman_decoder* decoder, struct bitstream* bitbuf); + +enum huffman_error huffman_import_tree_rle(struct huffman_decoder* decoder, struct bitstream* bitbuf); +enum huffman_error huffman_import_tree_huffman(struct huffman_decoder* decoder, struct bitstream* bitbuf); + +int huffman_build_tree(struct huffman_decoder* decoder, uint32_t totaldata, uint32_t totalweight); +enum huffman_error huffman_assign_canonical_codes(struct huffman_decoder* decoder); +enum huffman_error huffman_compute_tree_from_histo(struct huffman_decoder* decoder); + +void huffman_build_lookup_table(struct huffman_decoder* decoder); + +#endif diff --git a/cores/saturn/deps/libchdr/src/libchdr_bitstream.c b/cores/saturn/deps/libchdr/src/libchdr_bitstream.c new file mode 100644 index 000000000..c82a67ddc --- /dev/null +++ b/cores/saturn/deps/libchdr/src/libchdr_bitstream.c @@ -0,0 +1,125 @@ +/* license:BSD-3-Clause + * copyright-holders:Aaron Giles +*************************************************************************** + + bitstream.c + + Helper classes for reading/writing at the bit level. + +***************************************************************************/ + +#include +#include + +/*************************************************************************** + * INLINE FUNCTIONS + *************************************************************************** + */ + +int bitstream_overflow(struct bitstream* bitstream) { return ((bitstream->doffset - bitstream->bits / 8) > bitstream->dlength); } + +/*------------------------------------------------- + * create_bitstream - constructor + *------------------------------------------------- + */ + +struct bitstream* create_bitstream(const void *src, uint32_t srclength) +{ + struct bitstream* bitstream = (struct bitstream*)malloc(sizeof(struct bitstream)); + bitstream->buffer = 0; + bitstream->bits = 0; + bitstream->read = (const uint8_t*)src; + bitstream->doffset = 0; + bitstream->dlength = srclength; + return bitstream; +} + + +/*----------------------------------------------------- + * bitstream_peek - fetch the requested number of bits + * but don't advance the input pointer + *----------------------------------------------------- + */ + +uint32_t bitstream_peek(struct bitstream* bitstream, int numbits) +{ + if (numbits == 0) + return 0; + + /* fetch data if we need more */ + if (numbits > bitstream->bits) + { + while (bitstream->bits <= 24) + { + if (bitstream->doffset < bitstream->dlength) + bitstream->buffer |= bitstream->read[bitstream->doffset] << (24 - bitstream->bits); + bitstream->doffset++; + bitstream->bits += 8; + } + } + + /* return the data */ + return bitstream->buffer >> (32 - numbits); +} + + +/*----------------------------------------------------- + * bitstream_remove - advance the input pointer by the + * specified number of bits + *----------------------------------------------------- + */ + +void bitstream_remove(struct bitstream* bitstream, int numbits) +{ + bitstream->buffer <<= numbits; + bitstream->bits -= numbits; +} + + +/*----------------------------------------------------- + * bitstream_read - fetch the requested number of bits + *----------------------------------------------------- + */ + +uint32_t bitstream_read(struct bitstream* bitstream, int numbits) +{ + uint32_t result = bitstream_peek(bitstream, numbits); + bitstream_remove(bitstream, numbits); + return result; +} + + +/*------------------------------------------------- + * read_offset - return the current read offset + *------------------------------------------------- + */ + +uint32_t bitstream_read_offset(struct bitstream* bitstream) +{ + uint32_t result = bitstream->doffset; + int bits = bitstream->bits; + while (bits >= 8) + { + result--; + bits -= 8; + } + return result; +} + + +/*------------------------------------------------- + * flush - flush to the nearest byte + *------------------------------------------------- + */ + +uint32_t bitstream_flush(struct bitstream* bitstream) +{ + while (bitstream->bits >= 8) + { + bitstream->doffset--; + bitstream->bits -= 8; + } + bitstream->bits = bitstream->buffer = 0; + return bitstream->doffset; +} + diff --git a/cores/saturn/deps/libchdr/src/libchdr_cdrom.c b/cores/saturn/deps/libchdr/src/libchdr_cdrom.c new file mode 100644 index 000000000..58be0155e --- /dev/null +++ b/cores/saturn/deps/libchdr/src/libchdr_cdrom.c @@ -0,0 +1,415 @@ +/* license:BSD-3-Clause + * copyright-holders:Aaron Giles +*************************************************************************** + + cdrom.c + + Generic MAME CD-ROM utilties - build IDE and SCSI CD-ROMs on top of this + +**************************************************************************** + + IMPORTANT: + "physical" block addresses are the actual addresses on the emulated CD. + "chd" block addresses are the block addresses in the CHD file. + Because we pad each track to a 4-frame boundary, these addressing + schemes will differ after track 1! + +***************************************************************************/ +#include +#include + +#include + +#ifdef WANT_RAW_DATA_SECTOR + +/*************************************************************************** + DEBUGGING +***************************************************************************/ + +/** @brief The verbose. */ +#define VERBOSE (0) +#if VERBOSE + +/** + * @def LOG(x) do + * + * @brief A macro that defines log. + * + * @param x The void to process. + */ + +#define LOG(x) do { if (VERBOSE) logerror x; } while (0) + +/** + * @fn void CLIB_DECL logerror(const char *text, ...) ATTR_PRINTF(1,2); + * + * @brief Logerrors the given text. + * + * @param text The text. + * + * @return A CLIB_DECL. + */ + +void CLIB_DECL logerror(const char *text, ...) ATTR_PRINTF(1,2); +#else + +/** + * @def LOG(x); + * + * @brief A macro that defines log. + * + * @param x The void to process. + */ + +#define LOG(x) +#endif + +/*************************************************************************** + CONSTANTS +***************************************************************************/ + +/** @brief offset within sector. */ +#define SYNC_OFFSET 0x000 +/** @brief 12 bytes. */ +#define SYNC_NUM_BYTES 12 + +/** @brief offset within sector. */ +#define MODE_OFFSET 0x00f + +/** @brief offset within sector. */ +#define ECC_P_OFFSET 0x81c +/** @brief 2 lots of 86. */ +#define ECC_P_NUM_BYTES 86 +/** @brief 24 bytes each. */ +#define ECC_P_COMP 24 + +/** @brief The ECC q offset. */ +#define ECC_Q_OFFSET (ECC_P_OFFSET + 2 * ECC_P_NUM_BYTES) +/** @brief 2 lots of 52. */ +#define ECC_Q_NUM_BYTES 52 +/** @brief 43 bytes each. */ +#define ECC_Q_COMP 43 + +/** + * @brief ------------------------------------------------- + * ECC lookup tables pre-calculated tables for ECC data calcs + * -------------------------------------------------. + */ + +static const uint8_t ecclow[256] = +{ + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, + 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, + 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, + 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, + 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, + 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, + 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, + 0x1d, 0x1f, 0x19, 0x1b, 0x15, 0x17, 0x11, 0x13, 0x0d, 0x0f, 0x09, 0x0b, 0x05, 0x07, 0x01, 0x03, + 0x3d, 0x3f, 0x39, 0x3b, 0x35, 0x37, 0x31, 0x33, 0x2d, 0x2f, 0x29, 0x2b, 0x25, 0x27, 0x21, 0x23, + 0x5d, 0x5f, 0x59, 0x5b, 0x55, 0x57, 0x51, 0x53, 0x4d, 0x4f, 0x49, 0x4b, 0x45, 0x47, 0x41, 0x43, + 0x7d, 0x7f, 0x79, 0x7b, 0x75, 0x77, 0x71, 0x73, 0x6d, 0x6f, 0x69, 0x6b, 0x65, 0x67, 0x61, 0x63, + 0x9d, 0x9f, 0x99, 0x9b, 0x95, 0x97, 0x91, 0x93, 0x8d, 0x8f, 0x89, 0x8b, 0x85, 0x87, 0x81, 0x83, + 0xbd, 0xbf, 0xb9, 0xbb, 0xb5, 0xb7, 0xb1, 0xb3, 0xad, 0xaf, 0xa9, 0xab, 0xa5, 0xa7, 0xa1, 0xa3, + 0xdd, 0xdf, 0xd9, 0xdb, 0xd5, 0xd7, 0xd1, 0xd3, 0xcd, 0xcf, 0xc9, 0xcb, 0xc5, 0xc7, 0xc1, 0xc3, + 0xfd, 0xff, 0xf9, 0xfb, 0xf5, 0xf7, 0xf1, 0xf3, 0xed, 0xef, 0xe9, 0xeb, 0xe5, 0xe7, 0xe1, 0xe3 +}; + +/** @brief The ecchigh[ 256]. */ +static const uint8_t ecchigh[256] = +{ + 0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05, + 0xfb, 0x0f, 0x0e, 0xfa, 0x0c, 0xf8, 0xf9, 0x0d, 0x08, 0xfc, 0xfd, 0x09, 0xff, 0x0b, 0x0a, 0xfe, + 0xeb, 0x1f, 0x1e, 0xea, 0x1c, 0xe8, 0xe9, 0x1d, 0x18, 0xec, 0xed, 0x19, 0xef, 0x1b, 0x1a, 0xee, + 0x10, 0xe4, 0xe5, 0x11, 0xe7, 0x13, 0x12, 0xe6, 0xe3, 0x17, 0x16, 0xe2, 0x14, 0xe0, 0xe1, 0x15, + 0xcb, 0x3f, 0x3e, 0xca, 0x3c, 0xc8, 0xc9, 0x3d, 0x38, 0xcc, 0xcd, 0x39, 0xcf, 0x3b, 0x3a, 0xce, + 0x30, 0xc4, 0xc5, 0x31, 0xc7, 0x33, 0x32, 0xc6, 0xc3, 0x37, 0x36, 0xc2, 0x34, 0xc0, 0xc1, 0x35, + 0x20, 0xd4, 0xd5, 0x21, 0xd7, 0x23, 0x22, 0xd6, 0xd3, 0x27, 0x26, 0xd2, 0x24, 0xd0, 0xd1, 0x25, + 0xdb, 0x2f, 0x2e, 0xda, 0x2c, 0xd8, 0xd9, 0x2d, 0x28, 0xdc, 0xdd, 0x29, 0xdf, 0x2b, 0x2a, 0xde, + 0x8b, 0x7f, 0x7e, 0x8a, 0x7c, 0x88, 0x89, 0x7d, 0x78, 0x8c, 0x8d, 0x79, 0x8f, 0x7b, 0x7a, 0x8e, + 0x70, 0x84, 0x85, 0x71, 0x87, 0x73, 0x72, 0x86, 0x83, 0x77, 0x76, 0x82, 0x74, 0x80, 0x81, 0x75, + 0x60, 0x94, 0x95, 0x61, 0x97, 0x63, 0x62, 0x96, 0x93, 0x67, 0x66, 0x92, 0x64, 0x90, 0x91, 0x65, + 0x9b, 0x6f, 0x6e, 0x9a, 0x6c, 0x98, 0x99, 0x6d, 0x68, 0x9c, 0x9d, 0x69, 0x9f, 0x6b, 0x6a, 0x9e, + 0x40, 0xb4, 0xb5, 0x41, 0xb7, 0x43, 0x42, 0xb6, 0xb3, 0x47, 0x46, 0xb2, 0x44, 0xb0, 0xb1, 0x45, + 0xbb, 0x4f, 0x4e, 0xba, 0x4c, 0xb8, 0xb9, 0x4d, 0x48, 0xbc, 0xbd, 0x49, 0xbf, 0x4b, 0x4a, 0xbe, + 0xab, 0x5f, 0x5e, 0xaa, 0x5c, 0xa8, 0xa9, 0x5d, 0x58, 0xac, 0xad, 0x59, 0xaf, 0x5b, 0x5a, 0xae, + 0x50, 0xa4, 0xa5, 0x51, 0xa7, 0x53, 0x52, 0xa6, 0xa3, 0x57, 0x56, 0xa2, 0x54, 0xa0, 0xa1, 0x55 +}; + +/** + * @brief ------------------------------------------------- + * poffsets - each row represents the addresses used to calculate a byte of the ECC P + * data 86 (*2) ECC P bytes, 24 values represented by each + * -------------------------------------------------. + */ + +static const uint16_t poffsets[ECC_P_NUM_BYTES][ECC_P_COMP] = +{ + { 0x000,0x056,0x0ac,0x102,0x158,0x1ae,0x204,0x25a,0x2b0,0x306,0x35c,0x3b2,0x408,0x45e,0x4b4,0x50a,0x560,0x5b6,0x60c,0x662,0x6b8,0x70e,0x764,0x7ba }, + { 0x001,0x057,0x0ad,0x103,0x159,0x1af,0x205,0x25b,0x2b1,0x307,0x35d,0x3b3,0x409,0x45f,0x4b5,0x50b,0x561,0x5b7,0x60d,0x663,0x6b9,0x70f,0x765,0x7bb }, + { 0x002,0x058,0x0ae,0x104,0x15a,0x1b0,0x206,0x25c,0x2b2,0x308,0x35e,0x3b4,0x40a,0x460,0x4b6,0x50c,0x562,0x5b8,0x60e,0x664,0x6ba,0x710,0x766,0x7bc }, + { 0x003,0x059,0x0af,0x105,0x15b,0x1b1,0x207,0x25d,0x2b3,0x309,0x35f,0x3b5,0x40b,0x461,0x4b7,0x50d,0x563,0x5b9,0x60f,0x665,0x6bb,0x711,0x767,0x7bd }, + { 0x004,0x05a,0x0b0,0x106,0x15c,0x1b2,0x208,0x25e,0x2b4,0x30a,0x360,0x3b6,0x40c,0x462,0x4b8,0x50e,0x564,0x5ba,0x610,0x666,0x6bc,0x712,0x768,0x7be }, + { 0x005,0x05b,0x0b1,0x107,0x15d,0x1b3,0x209,0x25f,0x2b5,0x30b,0x361,0x3b7,0x40d,0x463,0x4b9,0x50f,0x565,0x5bb,0x611,0x667,0x6bd,0x713,0x769,0x7bf }, + { 0x006,0x05c,0x0b2,0x108,0x15e,0x1b4,0x20a,0x260,0x2b6,0x30c,0x362,0x3b8,0x40e,0x464,0x4ba,0x510,0x566,0x5bc,0x612,0x668,0x6be,0x714,0x76a,0x7c0 }, + { 0x007,0x05d,0x0b3,0x109,0x15f,0x1b5,0x20b,0x261,0x2b7,0x30d,0x363,0x3b9,0x40f,0x465,0x4bb,0x511,0x567,0x5bd,0x613,0x669,0x6bf,0x715,0x76b,0x7c1 }, + { 0x008,0x05e,0x0b4,0x10a,0x160,0x1b6,0x20c,0x262,0x2b8,0x30e,0x364,0x3ba,0x410,0x466,0x4bc,0x512,0x568,0x5be,0x614,0x66a,0x6c0,0x716,0x76c,0x7c2 }, + { 0x009,0x05f,0x0b5,0x10b,0x161,0x1b7,0x20d,0x263,0x2b9,0x30f,0x365,0x3bb,0x411,0x467,0x4bd,0x513,0x569,0x5bf,0x615,0x66b,0x6c1,0x717,0x76d,0x7c3 }, + { 0x00a,0x060,0x0b6,0x10c,0x162,0x1b8,0x20e,0x264,0x2ba,0x310,0x366,0x3bc,0x412,0x468,0x4be,0x514,0x56a,0x5c0,0x616,0x66c,0x6c2,0x718,0x76e,0x7c4 }, + { 0x00b,0x061,0x0b7,0x10d,0x163,0x1b9,0x20f,0x265,0x2bb,0x311,0x367,0x3bd,0x413,0x469,0x4bf,0x515,0x56b,0x5c1,0x617,0x66d,0x6c3,0x719,0x76f,0x7c5 }, + { 0x00c,0x062,0x0b8,0x10e,0x164,0x1ba,0x210,0x266,0x2bc,0x312,0x368,0x3be,0x414,0x46a,0x4c0,0x516,0x56c,0x5c2,0x618,0x66e,0x6c4,0x71a,0x770,0x7c6 }, + { 0x00d,0x063,0x0b9,0x10f,0x165,0x1bb,0x211,0x267,0x2bd,0x313,0x369,0x3bf,0x415,0x46b,0x4c1,0x517,0x56d,0x5c3,0x619,0x66f,0x6c5,0x71b,0x771,0x7c7 }, + { 0x00e,0x064,0x0ba,0x110,0x166,0x1bc,0x212,0x268,0x2be,0x314,0x36a,0x3c0,0x416,0x46c,0x4c2,0x518,0x56e,0x5c4,0x61a,0x670,0x6c6,0x71c,0x772,0x7c8 }, + { 0x00f,0x065,0x0bb,0x111,0x167,0x1bd,0x213,0x269,0x2bf,0x315,0x36b,0x3c1,0x417,0x46d,0x4c3,0x519,0x56f,0x5c5,0x61b,0x671,0x6c7,0x71d,0x773,0x7c9 }, + { 0x010,0x066,0x0bc,0x112,0x168,0x1be,0x214,0x26a,0x2c0,0x316,0x36c,0x3c2,0x418,0x46e,0x4c4,0x51a,0x570,0x5c6,0x61c,0x672,0x6c8,0x71e,0x774,0x7ca }, + { 0x011,0x067,0x0bd,0x113,0x169,0x1bf,0x215,0x26b,0x2c1,0x317,0x36d,0x3c3,0x419,0x46f,0x4c5,0x51b,0x571,0x5c7,0x61d,0x673,0x6c9,0x71f,0x775,0x7cb }, + { 0x012,0x068,0x0be,0x114,0x16a,0x1c0,0x216,0x26c,0x2c2,0x318,0x36e,0x3c4,0x41a,0x470,0x4c6,0x51c,0x572,0x5c8,0x61e,0x674,0x6ca,0x720,0x776,0x7cc }, + { 0x013,0x069,0x0bf,0x115,0x16b,0x1c1,0x217,0x26d,0x2c3,0x319,0x36f,0x3c5,0x41b,0x471,0x4c7,0x51d,0x573,0x5c9,0x61f,0x675,0x6cb,0x721,0x777,0x7cd }, + { 0x014,0x06a,0x0c0,0x116,0x16c,0x1c2,0x218,0x26e,0x2c4,0x31a,0x370,0x3c6,0x41c,0x472,0x4c8,0x51e,0x574,0x5ca,0x620,0x676,0x6cc,0x722,0x778,0x7ce }, + { 0x015,0x06b,0x0c1,0x117,0x16d,0x1c3,0x219,0x26f,0x2c5,0x31b,0x371,0x3c7,0x41d,0x473,0x4c9,0x51f,0x575,0x5cb,0x621,0x677,0x6cd,0x723,0x779,0x7cf }, + { 0x016,0x06c,0x0c2,0x118,0x16e,0x1c4,0x21a,0x270,0x2c6,0x31c,0x372,0x3c8,0x41e,0x474,0x4ca,0x520,0x576,0x5cc,0x622,0x678,0x6ce,0x724,0x77a,0x7d0 }, + { 0x017,0x06d,0x0c3,0x119,0x16f,0x1c5,0x21b,0x271,0x2c7,0x31d,0x373,0x3c9,0x41f,0x475,0x4cb,0x521,0x577,0x5cd,0x623,0x679,0x6cf,0x725,0x77b,0x7d1 }, + { 0x018,0x06e,0x0c4,0x11a,0x170,0x1c6,0x21c,0x272,0x2c8,0x31e,0x374,0x3ca,0x420,0x476,0x4cc,0x522,0x578,0x5ce,0x624,0x67a,0x6d0,0x726,0x77c,0x7d2 }, + { 0x019,0x06f,0x0c5,0x11b,0x171,0x1c7,0x21d,0x273,0x2c9,0x31f,0x375,0x3cb,0x421,0x477,0x4cd,0x523,0x579,0x5cf,0x625,0x67b,0x6d1,0x727,0x77d,0x7d3 }, + { 0x01a,0x070,0x0c6,0x11c,0x172,0x1c8,0x21e,0x274,0x2ca,0x320,0x376,0x3cc,0x422,0x478,0x4ce,0x524,0x57a,0x5d0,0x626,0x67c,0x6d2,0x728,0x77e,0x7d4 }, + { 0x01b,0x071,0x0c7,0x11d,0x173,0x1c9,0x21f,0x275,0x2cb,0x321,0x377,0x3cd,0x423,0x479,0x4cf,0x525,0x57b,0x5d1,0x627,0x67d,0x6d3,0x729,0x77f,0x7d5 }, + { 0x01c,0x072,0x0c8,0x11e,0x174,0x1ca,0x220,0x276,0x2cc,0x322,0x378,0x3ce,0x424,0x47a,0x4d0,0x526,0x57c,0x5d2,0x628,0x67e,0x6d4,0x72a,0x780,0x7d6 }, + { 0x01d,0x073,0x0c9,0x11f,0x175,0x1cb,0x221,0x277,0x2cd,0x323,0x379,0x3cf,0x425,0x47b,0x4d1,0x527,0x57d,0x5d3,0x629,0x67f,0x6d5,0x72b,0x781,0x7d7 }, + { 0x01e,0x074,0x0ca,0x120,0x176,0x1cc,0x222,0x278,0x2ce,0x324,0x37a,0x3d0,0x426,0x47c,0x4d2,0x528,0x57e,0x5d4,0x62a,0x680,0x6d6,0x72c,0x782,0x7d8 }, + { 0x01f,0x075,0x0cb,0x121,0x177,0x1cd,0x223,0x279,0x2cf,0x325,0x37b,0x3d1,0x427,0x47d,0x4d3,0x529,0x57f,0x5d5,0x62b,0x681,0x6d7,0x72d,0x783,0x7d9 }, + { 0x020,0x076,0x0cc,0x122,0x178,0x1ce,0x224,0x27a,0x2d0,0x326,0x37c,0x3d2,0x428,0x47e,0x4d4,0x52a,0x580,0x5d6,0x62c,0x682,0x6d8,0x72e,0x784,0x7da }, + { 0x021,0x077,0x0cd,0x123,0x179,0x1cf,0x225,0x27b,0x2d1,0x327,0x37d,0x3d3,0x429,0x47f,0x4d5,0x52b,0x581,0x5d7,0x62d,0x683,0x6d9,0x72f,0x785,0x7db }, + { 0x022,0x078,0x0ce,0x124,0x17a,0x1d0,0x226,0x27c,0x2d2,0x328,0x37e,0x3d4,0x42a,0x480,0x4d6,0x52c,0x582,0x5d8,0x62e,0x684,0x6da,0x730,0x786,0x7dc }, + { 0x023,0x079,0x0cf,0x125,0x17b,0x1d1,0x227,0x27d,0x2d3,0x329,0x37f,0x3d5,0x42b,0x481,0x4d7,0x52d,0x583,0x5d9,0x62f,0x685,0x6db,0x731,0x787,0x7dd }, + { 0x024,0x07a,0x0d0,0x126,0x17c,0x1d2,0x228,0x27e,0x2d4,0x32a,0x380,0x3d6,0x42c,0x482,0x4d8,0x52e,0x584,0x5da,0x630,0x686,0x6dc,0x732,0x788,0x7de }, + { 0x025,0x07b,0x0d1,0x127,0x17d,0x1d3,0x229,0x27f,0x2d5,0x32b,0x381,0x3d7,0x42d,0x483,0x4d9,0x52f,0x585,0x5db,0x631,0x687,0x6dd,0x733,0x789,0x7df }, + { 0x026,0x07c,0x0d2,0x128,0x17e,0x1d4,0x22a,0x280,0x2d6,0x32c,0x382,0x3d8,0x42e,0x484,0x4da,0x530,0x586,0x5dc,0x632,0x688,0x6de,0x734,0x78a,0x7e0 }, + { 0x027,0x07d,0x0d3,0x129,0x17f,0x1d5,0x22b,0x281,0x2d7,0x32d,0x383,0x3d9,0x42f,0x485,0x4db,0x531,0x587,0x5dd,0x633,0x689,0x6df,0x735,0x78b,0x7e1 }, + { 0x028,0x07e,0x0d4,0x12a,0x180,0x1d6,0x22c,0x282,0x2d8,0x32e,0x384,0x3da,0x430,0x486,0x4dc,0x532,0x588,0x5de,0x634,0x68a,0x6e0,0x736,0x78c,0x7e2 }, + { 0x029,0x07f,0x0d5,0x12b,0x181,0x1d7,0x22d,0x283,0x2d9,0x32f,0x385,0x3db,0x431,0x487,0x4dd,0x533,0x589,0x5df,0x635,0x68b,0x6e1,0x737,0x78d,0x7e3 }, + { 0x02a,0x080,0x0d6,0x12c,0x182,0x1d8,0x22e,0x284,0x2da,0x330,0x386,0x3dc,0x432,0x488,0x4de,0x534,0x58a,0x5e0,0x636,0x68c,0x6e2,0x738,0x78e,0x7e4 }, + { 0x02b,0x081,0x0d7,0x12d,0x183,0x1d9,0x22f,0x285,0x2db,0x331,0x387,0x3dd,0x433,0x489,0x4df,0x535,0x58b,0x5e1,0x637,0x68d,0x6e3,0x739,0x78f,0x7e5 }, + { 0x02c,0x082,0x0d8,0x12e,0x184,0x1da,0x230,0x286,0x2dc,0x332,0x388,0x3de,0x434,0x48a,0x4e0,0x536,0x58c,0x5e2,0x638,0x68e,0x6e4,0x73a,0x790,0x7e6 }, + { 0x02d,0x083,0x0d9,0x12f,0x185,0x1db,0x231,0x287,0x2dd,0x333,0x389,0x3df,0x435,0x48b,0x4e1,0x537,0x58d,0x5e3,0x639,0x68f,0x6e5,0x73b,0x791,0x7e7 }, + { 0x02e,0x084,0x0da,0x130,0x186,0x1dc,0x232,0x288,0x2de,0x334,0x38a,0x3e0,0x436,0x48c,0x4e2,0x538,0x58e,0x5e4,0x63a,0x690,0x6e6,0x73c,0x792,0x7e8 }, + { 0x02f,0x085,0x0db,0x131,0x187,0x1dd,0x233,0x289,0x2df,0x335,0x38b,0x3e1,0x437,0x48d,0x4e3,0x539,0x58f,0x5e5,0x63b,0x691,0x6e7,0x73d,0x793,0x7e9 }, + { 0x030,0x086,0x0dc,0x132,0x188,0x1de,0x234,0x28a,0x2e0,0x336,0x38c,0x3e2,0x438,0x48e,0x4e4,0x53a,0x590,0x5e6,0x63c,0x692,0x6e8,0x73e,0x794,0x7ea }, + { 0x031,0x087,0x0dd,0x133,0x189,0x1df,0x235,0x28b,0x2e1,0x337,0x38d,0x3e3,0x439,0x48f,0x4e5,0x53b,0x591,0x5e7,0x63d,0x693,0x6e9,0x73f,0x795,0x7eb }, + { 0x032,0x088,0x0de,0x134,0x18a,0x1e0,0x236,0x28c,0x2e2,0x338,0x38e,0x3e4,0x43a,0x490,0x4e6,0x53c,0x592,0x5e8,0x63e,0x694,0x6ea,0x740,0x796,0x7ec }, + { 0x033,0x089,0x0df,0x135,0x18b,0x1e1,0x237,0x28d,0x2e3,0x339,0x38f,0x3e5,0x43b,0x491,0x4e7,0x53d,0x593,0x5e9,0x63f,0x695,0x6eb,0x741,0x797,0x7ed }, + { 0x034,0x08a,0x0e0,0x136,0x18c,0x1e2,0x238,0x28e,0x2e4,0x33a,0x390,0x3e6,0x43c,0x492,0x4e8,0x53e,0x594,0x5ea,0x640,0x696,0x6ec,0x742,0x798,0x7ee }, + { 0x035,0x08b,0x0e1,0x137,0x18d,0x1e3,0x239,0x28f,0x2e5,0x33b,0x391,0x3e7,0x43d,0x493,0x4e9,0x53f,0x595,0x5eb,0x641,0x697,0x6ed,0x743,0x799,0x7ef }, + { 0x036,0x08c,0x0e2,0x138,0x18e,0x1e4,0x23a,0x290,0x2e6,0x33c,0x392,0x3e8,0x43e,0x494,0x4ea,0x540,0x596,0x5ec,0x642,0x698,0x6ee,0x744,0x79a,0x7f0 }, + { 0x037,0x08d,0x0e3,0x139,0x18f,0x1e5,0x23b,0x291,0x2e7,0x33d,0x393,0x3e9,0x43f,0x495,0x4eb,0x541,0x597,0x5ed,0x643,0x699,0x6ef,0x745,0x79b,0x7f1 }, + { 0x038,0x08e,0x0e4,0x13a,0x190,0x1e6,0x23c,0x292,0x2e8,0x33e,0x394,0x3ea,0x440,0x496,0x4ec,0x542,0x598,0x5ee,0x644,0x69a,0x6f0,0x746,0x79c,0x7f2 }, + { 0x039,0x08f,0x0e5,0x13b,0x191,0x1e7,0x23d,0x293,0x2e9,0x33f,0x395,0x3eb,0x441,0x497,0x4ed,0x543,0x599,0x5ef,0x645,0x69b,0x6f1,0x747,0x79d,0x7f3 }, + { 0x03a,0x090,0x0e6,0x13c,0x192,0x1e8,0x23e,0x294,0x2ea,0x340,0x396,0x3ec,0x442,0x498,0x4ee,0x544,0x59a,0x5f0,0x646,0x69c,0x6f2,0x748,0x79e,0x7f4 }, + { 0x03b,0x091,0x0e7,0x13d,0x193,0x1e9,0x23f,0x295,0x2eb,0x341,0x397,0x3ed,0x443,0x499,0x4ef,0x545,0x59b,0x5f1,0x647,0x69d,0x6f3,0x749,0x79f,0x7f5 }, + { 0x03c,0x092,0x0e8,0x13e,0x194,0x1ea,0x240,0x296,0x2ec,0x342,0x398,0x3ee,0x444,0x49a,0x4f0,0x546,0x59c,0x5f2,0x648,0x69e,0x6f4,0x74a,0x7a0,0x7f6 }, + { 0x03d,0x093,0x0e9,0x13f,0x195,0x1eb,0x241,0x297,0x2ed,0x343,0x399,0x3ef,0x445,0x49b,0x4f1,0x547,0x59d,0x5f3,0x649,0x69f,0x6f5,0x74b,0x7a1,0x7f7 }, + { 0x03e,0x094,0x0ea,0x140,0x196,0x1ec,0x242,0x298,0x2ee,0x344,0x39a,0x3f0,0x446,0x49c,0x4f2,0x548,0x59e,0x5f4,0x64a,0x6a0,0x6f6,0x74c,0x7a2,0x7f8 }, + { 0x03f,0x095,0x0eb,0x141,0x197,0x1ed,0x243,0x299,0x2ef,0x345,0x39b,0x3f1,0x447,0x49d,0x4f3,0x549,0x59f,0x5f5,0x64b,0x6a1,0x6f7,0x74d,0x7a3,0x7f9 }, + { 0x040,0x096,0x0ec,0x142,0x198,0x1ee,0x244,0x29a,0x2f0,0x346,0x39c,0x3f2,0x448,0x49e,0x4f4,0x54a,0x5a0,0x5f6,0x64c,0x6a2,0x6f8,0x74e,0x7a4,0x7fa }, + { 0x041,0x097,0x0ed,0x143,0x199,0x1ef,0x245,0x29b,0x2f1,0x347,0x39d,0x3f3,0x449,0x49f,0x4f5,0x54b,0x5a1,0x5f7,0x64d,0x6a3,0x6f9,0x74f,0x7a5,0x7fb }, + { 0x042,0x098,0x0ee,0x144,0x19a,0x1f0,0x246,0x29c,0x2f2,0x348,0x39e,0x3f4,0x44a,0x4a0,0x4f6,0x54c,0x5a2,0x5f8,0x64e,0x6a4,0x6fa,0x750,0x7a6,0x7fc }, + { 0x043,0x099,0x0ef,0x145,0x19b,0x1f1,0x247,0x29d,0x2f3,0x349,0x39f,0x3f5,0x44b,0x4a1,0x4f7,0x54d,0x5a3,0x5f9,0x64f,0x6a5,0x6fb,0x751,0x7a7,0x7fd }, + { 0x044,0x09a,0x0f0,0x146,0x19c,0x1f2,0x248,0x29e,0x2f4,0x34a,0x3a0,0x3f6,0x44c,0x4a2,0x4f8,0x54e,0x5a4,0x5fa,0x650,0x6a6,0x6fc,0x752,0x7a8,0x7fe }, + { 0x045,0x09b,0x0f1,0x147,0x19d,0x1f3,0x249,0x29f,0x2f5,0x34b,0x3a1,0x3f7,0x44d,0x4a3,0x4f9,0x54f,0x5a5,0x5fb,0x651,0x6a7,0x6fd,0x753,0x7a9,0x7ff }, + { 0x046,0x09c,0x0f2,0x148,0x19e,0x1f4,0x24a,0x2a0,0x2f6,0x34c,0x3a2,0x3f8,0x44e,0x4a4,0x4fa,0x550,0x5a6,0x5fc,0x652,0x6a8,0x6fe,0x754,0x7aa,0x800 }, + { 0x047,0x09d,0x0f3,0x149,0x19f,0x1f5,0x24b,0x2a1,0x2f7,0x34d,0x3a3,0x3f9,0x44f,0x4a5,0x4fb,0x551,0x5a7,0x5fd,0x653,0x6a9,0x6ff,0x755,0x7ab,0x801 }, + { 0x048,0x09e,0x0f4,0x14a,0x1a0,0x1f6,0x24c,0x2a2,0x2f8,0x34e,0x3a4,0x3fa,0x450,0x4a6,0x4fc,0x552,0x5a8,0x5fe,0x654,0x6aa,0x700,0x756,0x7ac,0x802 }, + { 0x049,0x09f,0x0f5,0x14b,0x1a1,0x1f7,0x24d,0x2a3,0x2f9,0x34f,0x3a5,0x3fb,0x451,0x4a7,0x4fd,0x553,0x5a9,0x5ff,0x655,0x6ab,0x701,0x757,0x7ad,0x803 }, + { 0x04a,0x0a0,0x0f6,0x14c,0x1a2,0x1f8,0x24e,0x2a4,0x2fa,0x350,0x3a6,0x3fc,0x452,0x4a8,0x4fe,0x554,0x5aa,0x600,0x656,0x6ac,0x702,0x758,0x7ae,0x804 }, + { 0x04b,0x0a1,0x0f7,0x14d,0x1a3,0x1f9,0x24f,0x2a5,0x2fb,0x351,0x3a7,0x3fd,0x453,0x4a9,0x4ff,0x555,0x5ab,0x601,0x657,0x6ad,0x703,0x759,0x7af,0x805 }, + { 0x04c,0x0a2,0x0f8,0x14e,0x1a4,0x1fa,0x250,0x2a6,0x2fc,0x352,0x3a8,0x3fe,0x454,0x4aa,0x500,0x556,0x5ac,0x602,0x658,0x6ae,0x704,0x75a,0x7b0,0x806 }, + { 0x04d,0x0a3,0x0f9,0x14f,0x1a5,0x1fb,0x251,0x2a7,0x2fd,0x353,0x3a9,0x3ff,0x455,0x4ab,0x501,0x557,0x5ad,0x603,0x659,0x6af,0x705,0x75b,0x7b1,0x807 }, + { 0x04e,0x0a4,0x0fa,0x150,0x1a6,0x1fc,0x252,0x2a8,0x2fe,0x354,0x3aa,0x400,0x456,0x4ac,0x502,0x558,0x5ae,0x604,0x65a,0x6b0,0x706,0x75c,0x7b2,0x808 }, + { 0x04f,0x0a5,0x0fb,0x151,0x1a7,0x1fd,0x253,0x2a9,0x2ff,0x355,0x3ab,0x401,0x457,0x4ad,0x503,0x559,0x5af,0x605,0x65b,0x6b1,0x707,0x75d,0x7b3,0x809 }, + { 0x050,0x0a6,0x0fc,0x152,0x1a8,0x1fe,0x254,0x2aa,0x300,0x356,0x3ac,0x402,0x458,0x4ae,0x504,0x55a,0x5b0,0x606,0x65c,0x6b2,0x708,0x75e,0x7b4,0x80a }, + { 0x051,0x0a7,0x0fd,0x153,0x1a9,0x1ff,0x255,0x2ab,0x301,0x357,0x3ad,0x403,0x459,0x4af,0x505,0x55b,0x5b1,0x607,0x65d,0x6b3,0x709,0x75f,0x7b5,0x80b }, + { 0x052,0x0a8,0x0fe,0x154,0x1aa,0x200,0x256,0x2ac,0x302,0x358,0x3ae,0x404,0x45a,0x4b0,0x506,0x55c,0x5b2,0x608,0x65e,0x6b4,0x70a,0x760,0x7b6,0x80c }, + { 0x053,0x0a9,0x0ff,0x155,0x1ab,0x201,0x257,0x2ad,0x303,0x359,0x3af,0x405,0x45b,0x4b1,0x507,0x55d,0x5b3,0x609,0x65f,0x6b5,0x70b,0x761,0x7b7,0x80d }, + { 0x054,0x0aa,0x100,0x156,0x1ac,0x202,0x258,0x2ae,0x304,0x35a,0x3b0,0x406,0x45c,0x4b2,0x508,0x55e,0x5b4,0x60a,0x660,0x6b6,0x70c,0x762,0x7b8,0x80e }, + { 0x055,0x0ab,0x101,0x157,0x1ad,0x203,0x259,0x2af,0x305,0x35b,0x3b1,0x407,0x45d,0x4b3,0x509,0x55f,0x5b5,0x60b,0x661,0x6b7,0x70d,0x763,0x7b9,0x80f } +}; + +/** + * @brief ------------------------------------------------- + * qoffsets - each row represents the addresses used to calculate a byte of the ECC Q + * data 52 (*2) ECC Q bytes, 43 values represented by each + * -------------------------------------------------. + */ + +static const uint16_t qoffsets[ECC_Q_NUM_BYTES][ECC_Q_COMP] = +{ + { 0x000,0x058,0x0b0,0x108,0x160,0x1b8,0x210,0x268,0x2c0,0x318,0x370,0x3c8,0x420,0x478,0x4d0,0x528,0x580,0x5d8,0x630,0x688,0x6e0,0x738,0x790,0x7e8,0x840,0x898,0x034,0x08c,0x0e4,0x13c,0x194,0x1ec,0x244,0x29c,0x2f4,0x34c,0x3a4,0x3fc,0x454,0x4ac,0x504,0x55c,0x5b4 }, + { 0x001,0x059,0x0b1,0x109,0x161,0x1b9,0x211,0x269,0x2c1,0x319,0x371,0x3c9,0x421,0x479,0x4d1,0x529,0x581,0x5d9,0x631,0x689,0x6e1,0x739,0x791,0x7e9,0x841,0x899,0x035,0x08d,0x0e5,0x13d,0x195,0x1ed,0x245,0x29d,0x2f5,0x34d,0x3a5,0x3fd,0x455,0x4ad,0x505,0x55d,0x5b5 }, + { 0x056,0x0ae,0x106,0x15e,0x1b6,0x20e,0x266,0x2be,0x316,0x36e,0x3c6,0x41e,0x476,0x4ce,0x526,0x57e,0x5d6,0x62e,0x686,0x6de,0x736,0x78e,0x7e6,0x83e,0x896,0x032,0x08a,0x0e2,0x13a,0x192,0x1ea,0x242,0x29a,0x2f2,0x34a,0x3a2,0x3fa,0x452,0x4aa,0x502,0x55a,0x5b2,0x60a }, + { 0x057,0x0af,0x107,0x15f,0x1b7,0x20f,0x267,0x2bf,0x317,0x36f,0x3c7,0x41f,0x477,0x4cf,0x527,0x57f,0x5d7,0x62f,0x687,0x6df,0x737,0x78f,0x7e7,0x83f,0x897,0x033,0x08b,0x0e3,0x13b,0x193,0x1eb,0x243,0x29b,0x2f3,0x34b,0x3a3,0x3fb,0x453,0x4ab,0x503,0x55b,0x5b3,0x60b }, + { 0x0ac,0x104,0x15c,0x1b4,0x20c,0x264,0x2bc,0x314,0x36c,0x3c4,0x41c,0x474,0x4cc,0x524,0x57c,0x5d4,0x62c,0x684,0x6dc,0x734,0x78c,0x7e4,0x83c,0x894,0x030,0x088,0x0e0,0x138,0x190,0x1e8,0x240,0x298,0x2f0,0x348,0x3a0,0x3f8,0x450,0x4a8,0x500,0x558,0x5b0,0x608,0x660 }, + { 0x0ad,0x105,0x15d,0x1b5,0x20d,0x265,0x2bd,0x315,0x36d,0x3c5,0x41d,0x475,0x4cd,0x525,0x57d,0x5d5,0x62d,0x685,0x6dd,0x735,0x78d,0x7e5,0x83d,0x895,0x031,0x089,0x0e1,0x139,0x191,0x1e9,0x241,0x299,0x2f1,0x349,0x3a1,0x3f9,0x451,0x4a9,0x501,0x559,0x5b1,0x609,0x661 }, + { 0x102,0x15a,0x1b2,0x20a,0x262,0x2ba,0x312,0x36a,0x3c2,0x41a,0x472,0x4ca,0x522,0x57a,0x5d2,0x62a,0x682,0x6da,0x732,0x78a,0x7e2,0x83a,0x892,0x02e,0x086,0x0de,0x136,0x18e,0x1e6,0x23e,0x296,0x2ee,0x346,0x39e,0x3f6,0x44e,0x4a6,0x4fe,0x556,0x5ae,0x606,0x65e,0x6b6 }, + { 0x103,0x15b,0x1b3,0x20b,0x263,0x2bb,0x313,0x36b,0x3c3,0x41b,0x473,0x4cb,0x523,0x57b,0x5d3,0x62b,0x683,0x6db,0x733,0x78b,0x7e3,0x83b,0x893,0x02f,0x087,0x0df,0x137,0x18f,0x1e7,0x23f,0x297,0x2ef,0x347,0x39f,0x3f7,0x44f,0x4a7,0x4ff,0x557,0x5af,0x607,0x65f,0x6b7 }, + { 0x158,0x1b0,0x208,0x260,0x2b8,0x310,0x368,0x3c0,0x418,0x470,0x4c8,0x520,0x578,0x5d0,0x628,0x680,0x6d8,0x730,0x788,0x7e0,0x838,0x890,0x02c,0x084,0x0dc,0x134,0x18c,0x1e4,0x23c,0x294,0x2ec,0x344,0x39c,0x3f4,0x44c,0x4a4,0x4fc,0x554,0x5ac,0x604,0x65c,0x6b4,0x70c }, + { 0x159,0x1b1,0x209,0x261,0x2b9,0x311,0x369,0x3c1,0x419,0x471,0x4c9,0x521,0x579,0x5d1,0x629,0x681,0x6d9,0x731,0x789,0x7e1,0x839,0x891,0x02d,0x085,0x0dd,0x135,0x18d,0x1e5,0x23d,0x295,0x2ed,0x345,0x39d,0x3f5,0x44d,0x4a5,0x4fd,0x555,0x5ad,0x605,0x65d,0x6b5,0x70d }, + { 0x1ae,0x206,0x25e,0x2b6,0x30e,0x366,0x3be,0x416,0x46e,0x4c6,0x51e,0x576,0x5ce,0x626,0x67e,0x6d6,0x72e,0x786,0x7de,0x836,0x88e,0x02a,0x082,0x0da,0x132,0x18a,0x1e2,0x23a,0x292,0x2ea,0x342,0x39a,0x3f2,0x44a,0x4a2,0x4fa,0x552,0x5aa,0x602,0x65a,0x6b2,0x70a,0x762 }, + { 0x1af,0x207,0x25f,0x2b7,0x30f,0x367,0x3bf,0x417,0x46f,0x4c7,0x51f,0x577,0x5cf,0x627,0x67f,0x6d7,0x72f,0x787,0x7df,0x837,0x88f,0x02b,0x083,0x0db,0x133,0x18b,0x1e3,0x23b,0x293,0x2eb,0x343,0x39b,0x3f3,0x44b,0x4a3,0x4fb,0x553,0x5ab,0x603,0x65b,0x6b3,0x70b,0x763 }, + { 0x204,0x25c,0x2b4,0x30c,0x364,0x3bc,0x414,0x46c,0x4c4,0x51c,0x574,0x5cc,0x624,0x67c,0x6d4,0x72c,0x784,0x7dc,0x834,0x88c,0x028,0x080,0x0d8,0x130,0x188,0x1e0,0x238,0x290,0x2e8,0x340,0x398,0x3f0,0x448,0x4a0,0x4f8,0x550,0x5a8,0x600,0x658,0x6b0,0x708,0x760,0x7b8 }, + { 0x205,0x25d,0x2b5,0x30d,0x365,0x3bd,0x415,0x46d,0x4c5,0x51d,0x575,0x5cd,0x625,0x67d,0x6d5,0x72d,0x785,0x7dd,0x835,0x88d,0x029,0x081,0x0d9,0x131,0x189,0x1e1,0x239,0x291,0x2e9,0x341,0x399,0x3f1,0x449,0x4a1,0x4f9,0x551,0x5a9,0x601,0x659,0x6b1,0x709,0x761,0x7b9 }, + { 0x25a,0x2b2,0x30a,0x362,0x3ba,0x412,0x46a,0x4c2,0x51a,0x572,0x5ca,0x622,0x67a,0x6d2,0x72a,0x782,0x7da,0x832,0x88a,0x026,0x07e,0x0d6,0x12e,0x186,0x1de,0x236,0x28e,0x2e6,0x33e,0x396,0x3ee,0x446,0x49e,0x4f6,0x54e,0x5a6,0x5fe,0x656,0x6ae,0x706,0x75e,0x7b6,0x80e }, + { 0x25b,0x2b3,0x30b,0x363,0x3bb,0x413,0x46b,0x4c3,0x51b,0x573,0x5cb,0x623,0x67b,0x6d3,0x72b,0x783,0x7db,0x833,0x88b,0x027,0x07f,0x0d7,0x12f,0x187,0x1df,0x237,0x28f,0x2e7,0x33f,0x397,0x3ef,0x447,0x49f,0x4f7,0x54f,0x5a7,0x5ff,0x657,0x6af,0x707,0x75f,0x7b7,0x80f }, + { 0x2b0,0x308,0x360,0x3b8,0x410,0x468,0x4c0,0x518,0x570,0x5c8,0x620,0x678,0x6d0,0x728,0x780,0x7d8,0x830,0x888,0x024,0x07c,0x0d4,0x12c,0x184,0x1dc,0x234,0x28c,0x2e4,0x33c,0x394,0x3ec,0x444,0x49c,0x4f4,0x54c,0x5a4,0x5fc,0x654,0x6ac,0x704,0x75c,0x7b4,0x80c,0x864 }, + { 0x2b1,0x309,0x361,0x3b9,0x411,0x469,0x4c1,0x519,0x571,0x5c9,0x621,0x679,0x6d1,0x729,0x781,0x7d9,0x831,0x889,0x025,0x07d,0x0d5,0x12d,0x185,0x1dd,0x235,0x28d,0x2e5,0x33d,0x395,0x3ed,0x445,0x49d,0x4f5,0x54d,0x5a5,0x5fd,0x655,0x6ad,0x705,0x75d,0x7b5,0x80d,0x865 }, + { 0x306,0x35e,0x3b6,0x40e,0x466,0x4be,0x516,0x56e,0x5c6,0x61e,0x676,0x6ce,0x726,0x77e,0x7d6,0x82e,0x886,0x022,0x07a,0x0d2,0x12a,0x182,0x1da,0x232,0x28a,0x2e2,0x33a,0x392,0x3ea,0x442,0x49a,0x4f2,0x54a,0x5a2,0x5fa,0x652,0x6aa,0x702,0x75a,0x7b2,0x80a,0x862,0x8ba }, + { 0x307,0x35f,0x3b7,0x40f,0x467,0x4bf,0x517,0x56f,0x5c7,0x61f,0x677,0x6cf,0x727,0x77f,0x7d7,0x82f,0x887,0x023,0x07b,0x0d3,0x12b,0x183,0x1db,0x233,0x28b,0x2e3,0x33b,0x393,0x3eb,0x443,0x49b,0x4f3,0x54b,0x5a3,0x5fb,0x653,0x6ab,0x703,0x75b,0x7b3,0x80b,0x863,0x8bb }, + { 0x35c,0x3b4,0x40c,0x464,0x4bc,0x514,0x56c,0x5c4,0x61c,0x674,0x6cc,0x724,0x77c,0x7d4,0x82c,0x884,0x020,0x078,0x0d0,0x128,0x180,0x1d8,0x230,0x288,0x2e0,0x338,0x390,0x3e8,0x440,0x498,0x4f0,0x548,0x5a0,0x5f8,0x650,0x6a8,0x700,0x758,0x7b0,0x808,0x860,0x8b8,0x054 }, + { 0x35d,0x3b5,0x40d,0x465,0x4bd,0x515,0x56d,0x5c5,0x61d,0x675,0x6cd,0x725,0x77d,0x7d5,0x82d,0x885,0x021,0x079,0x0d1,0x129,0x181,0x1d9,0x231,0x289,0x2e1,0x339,0x391,0x3e9,0x441,0x499,0x4f1,0x549,0x5a1,0x5f9,0x651,0x6a9,0x701,0x759,0x7b1,0x809,0x861,0x8b9,0x055 }, + { 0x3b2,0x40a,0x462,0x4ba,0x512,0x56a,0x5c2,0x61a,0x672,0x6ca,0x722,0x77a,0x7d2,0x82a,0x882,0x01e,0x076,0x0ce,0x126,0x17e,0x1d6,0x22e,0x286,0x2de,0x336,0x38e,0x3e6,0x43e,0x496,0x4ee,0x546,0x59e,0x5f6,0x64e,0x6a6,0x6fe,0x756,0x7ae,0x806,0x85e,0x8b6,0x052,0x0aa }, + { 0x3b3,0x40b,0x463,0x4bb,0x513,0x56b,0x5c3,0x61b,0x673,0x6cb,0x723,0x77b,0x7d3,0x82b,0x883,0x01f,0x077,0x0cf,0x127,0x17f,0x1d7,0x22f,0x287,0x2df,0x337,0x38f,0x3e7,0x43f,0x497,0x4ef,0x547,0x59f,0x5f7,0x64f,0x6a7,0x6ff,0x757,0x7af,0x807,0x85f,0x8b7,0x053,0x0ab }, + { 0x408,0x460,0x4b8,0x510,0x568,0x5c0,0x618,0x670,0x6c8,0x720,0x778,0x7d0,0x828,0x880,0x01c,0x074,0x0cc,0x124,0x17c,0x1d4,0x22c,0x284,0x2dc,0x334,0x38c,0x3e4,0x43c,0x494,0x4ec,0x544,0x59c,0x5f4,0x64c,0x6a4,0x6fc,0x754,0x7ac,0x804,0x85c,0x8b4,0x050,0x0a8,0x100 }, + { 0x409,0x461,0x4b9,0x511,0x569,0x5c1,0x619,0x671,0x6c9,0x721,0x779,0x7d1,0x829,0x881,0x01d,0x075,0x0cd,0x125,0x17d,0x1d5,0x22d,0x285,0x2dd,0x335,0x38d,0x3e5,0x43d,0x495,0x4ed,0x545,0x59d,0x5f5,0x64d,0x6a5,0x6fd,0x755,0x7ad,0x805,0x85d,0x8b5,0x051,0x0a9,0x101 }, + { 0x45e,0x4b6,0x50e,0x566,0x5be,0x616,0x66e,0x6c6,0x71e,0x776,0x7ce,0x826,0x87e,0x01a,0x072,0x0ca,0x122,0x17a,0x1d2,0x22a,0x282,0x2da,0x332,0x38a,0x3e2,0x43a,0x492,0x4ea,0x542,0x59a,0x5f2,0x64a,0x6a2,0x6fa,0x752,0x7aa,0x802,0x85a,0x8b2,0x04e,0x0a6,0x0fe,0x156 }, + { 0x45f,0x4b7,0x50f,0x567,0x5bf,0x617,0x66f,0x6c7,0x71f,0x777,0x7cf,0x827,0x87f,0x01b,0x073,0x0cb,0x123,0x17b,0x1d3,0x22b,0x283,0x2db,0x333,0x38b,0x3e3,0x43b,0x493,0x4eb,0x543,0x59b,0x5f3,0x64b,0x6a3,0x6fb,0x753,0x7ab,0x803,0x85b,0x8b3,0x04f,0x0a7,0x0ff,0x157 }, + { 0x4b4,0x50c,0x564,0x5bc,0x614,0x66c,0x6c4,0x71c,0x774,0x7cc,0x824,0x87c,0x018,0x070,0x0c8,0x120,0x178,0x1d0,0x228,0x280,0x2d8,0x330,0x388,0x3e0,0x438,0x490,0x4e8,0x540,0x598,0x5f0,0x648,0x6a0,0x6f8,0x750,0x7a8,0x800,0x858,0x8b0,0x04c,0x0a4,0x0fc,0x154,0x1ac }, + { 0x4b5,0x50d,0x565,0x5bd,0x615,0x66d,0x6c5,0x71d,0x775,0x7cd,0x825,0x87d,0x019,0x071,0x0c9,0x121,0x179,0x1d1,0x229,0x281,0x2d9,0x331,0x389,0x3e1,0x439,0x491,0x4e9,0x541,0x599,0x5f1,0x649,0x6a1,0x6f9,0x751,0x7a9,0x801,0x859,0x8b1,0x04d,0x0a5,0x0fd,0x155,0x1ad }, + { 0x50a,0x562,0x5ba,0x612,0x66a,0x6c2,0x71a,0x772,0x7ca,0x822,0x87a,0x016,0x06e,0x0c6,0x11e,0x176,0x1ce,0x226,0x27e,0x2d6,0x32e,0x386,0x3de,0x436,0x48e,0x4e6,0x53e,0x596,0x5ee,0x646,0x69e,0x6f6,0x74e,0x7a6,0x7fe,0x856,0x8ae,0x04a,0x0a2,0x0fa,0x152,0x1aa,0x202 }, + { 0x50b,0x563,0x5bb,0x613,0x66b,0x6c3,0x71b,0x773,0x7cb,0x823,0x87b,0x017,0x06f,0x0c7,0x11f,0x177,0x1cf,0x227,0x27f,0x2d7,0x32f,0x387,0x3df,0x437,0x48f,0x4e7,0x53f,0x597,0x5ef,0x647,0x69f,0x6f7,0x74f,0x7a7,0x7ff,0x857,0x8af,0x04b,0x0a3,0x0fb,0x153,0x1ab,0x203 }, + { 0x560,0x5b8,0x610,0x668,0x6c0,0x718,0x770,0x7c8,0x820,0x878,0x014,0x06c,0x0c4,0x11c,0x174,0x1cc,0x224,0x27c,0x2d4,0x32c,0x384,0x3dc,0x434,0x48c,0x4e4,0x53c,0x594,0x5ec,0x644,0x69c,0x6f4,0x74c,0x7a4,0x7fc,0x854,0x8ac,0x048,0x0a0,0x0f8,0x150,0x1a8,0x200,0x258 }, + { 0x561,0x5b9,0x611,0x669,0x6c1,0x719,0x771,0x7c9,0x821,0x879,0x015,0x06d,0x0c5,0x11d,0x175,0x1cd,0x225,0x27d,0x2d5,0x32d,0x385,0x3dd,0x435,0x48d,0x4e5,0x53d,0x595,0x5ed,0x645,0x69d,0x6f5,0x74d,0x7a5,0x7fd,0x855,0x8ad,0x049,0x0a1,0x0f9,0x151,0x1a9,0x201,0x259 }, + { 0x5b6,0x60e,0x666,0x6be,0x716,0x76e,0x7c6,0x81e,0x876,0x012,0x06a,0x0c2,0x11a,0x172,0x1ca,0x222,0x27a,0x2d2,0x32a,0x382,0x3da,0x432,0x48a,0x4e2,0x53a,0x592,0x5ea,0x642,0x69a,0x6f2,0x74a,0x7a2,0x7fa,0x852,0x8aa,0x046,0x09e,0x0f6,0x14e,0x1a6,0x1fe,0x256,0x2ae }, + { 0x5b7,0x60f,0x667,0x6bf,0x717,0x76f,0x7c7,0x81f,0x877,0x013,0x06b,0x0c3,0x11b,0x173,0x1cb,0x223,0x27b,0x2d3,0x32b,0x383,0x3db,0x433,0x48b,0x4e3,0x53b,0x593,0x5eb,0x643,0x69b,0x6f3,0x74b,0x7a3,0x7fb,0x853,0x8ab,0x047,0x09f,0x0f7,0x14f,0x1a7,0x1ff,0x257,0x2af }, + { 0x60c,0x664,0x6bc,0x714,0x76c,0x7c4,0x81c,0x874,0x010,0x068,0x0c0,0x118,0x170,0x1c8,0x220,0x278,0x2d0,0x328,0x380,0x3d8,0x430,0x488,0x4e0,0x538,0x590,0x5e8,0x640,0x698,0x6f0,0x748,0x7a0,0x7f8,0x850,0x8a8,0x044,0x09c,0x0f4,0x14c,0x1a4,0x1fc,0x254,0x2ac,0x304 }, + { 0x60d,0x665,0x6bd,0x715,0x76d,0x7c5,0x81d,0x875,0x011,0x069,0x0c1,0x119,0x171,0x1c9,0x221,0x279,0x2d1,0x329,0x381,0x3d9,0x431,0x489,0x4e1,0x539,0x591,0x5e9,0x641,0x699,0x6f1,0x749,0x7a1,0x7f9,0x851,0x8a9,0x045,0x09d,0x0f5,0x14d,0x1a5,0x1fd,0x255,0x2ad,0x305 }, + { 0x662,0x6ba,0x712,0x76a,0x7c2,0x81a,0x872,0x00e,0x066,0x0be,0x116,0x16e,0x1c6,0x21e,0x276,0x2ce,0x326,0x37e,0x3d6,0x42e,0x486,0x4de,0x536,0x58e,0x5e6,0x63e,0x696,0x6ee,0x746,0x79e,0x7f6,0x84e,0x8a6,0x042,0x09a,0x0f2,0x14a,0x1a2,0x1fa,0x252,0x2aa,0x302,0x35a }, + { 0x663,0x6bb,0x713,0x76b,0x7c3,0x81b,0x873,0x00f,0x067,0x0bf,0x117,0x16f,0x1c7,0x21f,0x277,0x2cf,0x327,0x37f,0x3d7,0x42f,0x487,0x4df,0x537,0x58f,0x5e7,0x63f,0x697,0x6ef,0x747,0x79f,0x7f7,0x84f,0x8a7,0x043,0x09b,0x0f3,0x14b,0x1a3,0x1fb,0x253,0x2ab,0x303,0x35b }, + { 0x6b8,0x710,0x768,0x7c0,0x818,0x870,0x00c,0x064,0x0bc,0x114,0x16c,0x1c4,0x21c,0x274,0x2cc,0x324,0x37c,0x3d4,0x42c,0x484,0x4dc,0x534,0x58c,0x5e4,0x63c,0x694,0x6ec,0x744,0x79c,0x7f4,0x84c,0x8a4,0x040,0x098,0x0f0,0x148,0x1a0,0x1f8,0x250,0x2a8,0x300,0x358,0x3b0 }, + { 0x6b9,0x711,0x769,0x7c1,0x819,0x871,0x00d,0x065,0x0bd,0x115,0x16d,0x1c5,0x21d,0x275,0x2cd,0x325,0x37d,0x3d5,0x42d,0x485,0x4dd,0x535,0x58d,0x5e5,0x63d,0x695,0x6ed,0x745,0x79d,0x7f5,0x84d,0x8a5,0x041,0x099,0x0f1,0x149,0x1a1,0x1f9,0x251,0x2a9,0x301,0x359,0x3b1 }, + { 0x70e,0x766,0x7be,0x816,0x86e,0x00a,0x062,0x0ba,0x112,0x16a,0x1c2,0x21a,0x272,0x2ca,0x322,0x37a,0x3d2,0x42a,0x482,0x4da,0x532,0x58a,0x5e2,0x63a,0x692,0x6ea,0x742,0x79a,0x7f2,0x84a,0x8a2,0x03e,0x096,0x0ee,0x146,0x19e,0x1f6,0x24e,0x2a6,0x2fe,0x356,0x3ae,0x406 }, + { 0x70f,0x767,0x7bf,0x817,0x86f,0x00b,0x063,0x0bb,0x113,0x16b,0x1c3,0x21b,0x273,0x2cb,0x323,0x37b,0x3d3,0x42b,0x483,0x4db,0x533,0x58b,0x5e3,0x63b,0x693,0x6eb,0x743,0x79b,0x7f3,0x84b,0x8a3,0x03f,0x097,0x0ef,0x147,0x19f,0x1f7,0x24f,0x2a7,0x2ff,0x357,0x3af,0x407 }, + { 0x764,0x7bc,0x814,0x86c,0x008,0x060,0x0b8,0x110,0x168,0x1c0,0x218,0x270,0x2c8,0x320,0x378,0x3d0,0x428,0x480,0x4d8,0x530,0x588,0x5e0,0x638,0x690,0x6e8,0x740,0x798,0x7f0,0x848,0x8a0,0x03c,0x094,0x0ec,0x144,0x19c,0x1f4,0x24c,0x2a4,0x2fc,0x354,0x3ac,0x404,0x45c }, + { 0x765,0x7bd,0x815,0x86d,0x009,0x061,0x0b9,0x111,0x169,0x1c1,0x219,0x271,0x2c9,0x321,0x379,0x3d1,0x429,0x481,0x4d9,0x531,0x589,0x5e1,0x639,0x691,0x6e9,0x741,0x799,0x7f1,0x849,0x8a1,0x03d,0x095,0x0ed,0x145,0x19d,0x1f5,0x24d,0x2a5,0x2fd,0x355,0x3ad,0x405,0x45d }, + { 0x7ba,0x812,0x86a,0x006,0x05e,0x0b6,0x10e,0x166,0x1be,0x216,0x26e,0x2c6,0x31e,0x376,0x3ce,0x426,0x47e,0x4d6,0x52e,0x586,0x5de,0x636,0x68e,0x6e6,0x73e,0x796,0x7ee,0x846,0x89e,0x03a,0x092,0x0ea,0x142,0x19a,0x1f2,0x24a,0x2a2,0x2fa,0x352,0x3aa,0x402,0x45a,0x4b2 }, + { 0x7bb,0x813,0x86b,0x007,0x05f,0x0b7,0x10f,0x167,0x1bf,0x217,0x26f,0x2c7,0x31f,0x377,0x3cf,0x427,0x47f,0x4d7,0x52f,0x587,0x5df,0x637,0x68f,0x6e7,0x73f,0x797,0x7ef,0x847,0x89f,0x03b,0x093,0x0eb,0x143,0x19b,0x1f3,0x24b,0x2a3,0x2fb,0x353,0x3ab,0x403,0x45b,0x4b3 }, + { 0x810,0x868,0x004,0x05c,0x0b4,0x10c,0x164,0x1bc,0x214,0x26c,0x2c4,0x31c,0x374,0x3cc,0x424,0x47c,0x4d4,0x52c,0x584,0x5dc,0x634,0x68c,0x6e4,0x73c,0x794,0x7ec,0x844,0x89c,0x038,0x090,0x0e8,0x140,0x198,0x1f0,0x248,0x2a0,0x2f8,0x350,0x3a8,0x400,0x458,0x4b0,0x508 }, + { 0x811,0x869,0x005,0x05d,0x0b5,0x10d,0x165,0x1bd,0x215,0x26d,0x2c5,0x31d,0x375,0x3cd,0x425,0x47d,0x4d5,0x52d,0x585,0x5dd,0x635,0x68d,0x6e5,0x73d,0x795,0x7ed,0x845,0x89d,0x039,0x091,0x0e9,0x141,0x199,0x1f1,0x249,0x2a1,0x2f9,0x351,0x3a9,0x401,0x459,0x4b1,0x509 }, + { 0x866,0x002,0x05a,0x0b2,0x10a,0x162,0x1ba,0x212,0x26a,0x2c2,0x31a,0x372,0x3ca,0x422,0x47a,0x4d2,0x52a,0x582,0x5da,0x632,0x68a,0x6e2,0x73a,0x792,0x7ea,0x842,0x89a,0x036,0x08e,0x0e6,0x13e,0x196,0x1ee,0x246,0x29e,0x2f6,0x34e,0x3a6,0x3fe,0x456,0x4ae,0x506,0x55e }, + { 0x867,0x003,0x05b,0x0b3,0x10b,0x163,0x1bb,0x213,0x26b,0x2c3,0x31b,0x373,0x3cb,0x423,0x47b,0x4d3,0x52b,0x583,0x5db,0x633,0x68b,0x6e3,0x73b,0x793,0x7eb,0x843,0x89b,0x037,0x08f,0x0e7,0x13f,0x197,0x1ef,0x247,0x29f,0x2f7,0x34f,0x3a7,0x3ff,0x457,0x4af,0x507,0x55f } +}; + +/*------------------------------------------------- + * ecc_source_byte - return data from the sector + * at the given offset, masking anything + * particular to a mode + *------------------------------------------------- + */ + +static inline uint8_t ecc_source_byte(const uint8_t *sector, uint32_t offset) +{ + /* in mode 2 always treat these as 0 bytes */ + return (sector[MODE_OFFSET] == 2 && offset < 4) ? 0x00 : sector[SYNC_OFFSET + SYNC_NUM_BYTES + offset]; +} + +/** + * @fn void ecc_compute_bytes(const uint8_t *sector, const uint16_t *row, int rowlen, uint8_t &val1, uint8_t &val2) + * + * @brief ------------------------------------------------- + * ecc_compute_bytes - calculate an ECC value (P or Q) + * -------------------------------------------------. + * + * @param sector The sector. + * @param row The row. + * @param rowlen The rowlen. + * @param [in,out] val1 The first value. + * @param [in,out] val2 The second value. + */ + +void ecc_compute_bytes(const uint8_t *sector, const uint16_t *row, int rowlen, uint8_t *val1, uint8_t *val2) +{ + int component; + *val1 = *val2 = 0; + for (component = 0; component < rowlen; component++) + { + *val1 ^= ecc_source_byte(sector, row[component]); + *val2 ^= ecc_source_byte(sector, row[component]); + *val1 = ecclow[*val1]; + } + *val1 = ecchigh[ecclow[*val1] ^ *val2]; + *val2 ^= *val1; +} + +/** + * @fn int ecc_verify(const uint8_t *sector) + * + * @brief ------------------------------------------------- + * ecc_verify - verify the P and Q ECC codes in a sector + * -------------------------------------------------. + * + * @param sector The sector. + * + * @return true if it succeeds, false if it fails. + */ + +int ecc_verify(const uint8_t *sector) +{ + int byte; + /* first verify P bytes */ + for (byte = 0; byte < ECC_P_NUM_BYTES; byte++) + { + uint8_t val1, val2; + ecc_compute_bytes(sector, poffsets[byte], ECC_P_COMP, &val1, &val2); + if (sector[ECC_P_OFFSET + byte] != val1 || sector[ECC_P_OFFSET + ECC_P_NUM_BYTES + byte] != val2) + return 0; + } + + /* then verify Q bytes */ + for (byte = 0; byte < ECC_Q_NUM_BYTES; byte++) + { + uint8_t val1, val2; + ecc_compute_bytes(sector, qoffsets[byte], ECC_Q_COMP, &val1, &val2); + if (sector[ECC_Q_OFFSET + byte] != val1 || sector[ECC_Q_OFFSET + ECC_Q_NUM_BYTES + byte] != val2) + return 0; + } + return 1; +} + +/** + * @fn void ecc_generate(uint8_t *sector) + * + * @brief ------------------------------------------------- + * ecc_generate - generate the P and Q ECC codes for a sector, overwriting any + * existing codes + * -------------------------------------------------. + * + * @param [in,out] sector If non-null, the sector. + */ + +void ecc_generate(uint8_t *sector) +{ + int byte; + /* first verify P bytes */ + for (byte = 0; byte < ECC_P_NUM_BYTES; byte++) + ecc_compute_bytes(sector, poffsets[byte], ECC_P_COMP, §or[ECC_P_OFFSET + byte], §or[ECC_P_OFFSET + ECC_P_NUM_BYTES + byte]); + + /* then verify Q bytes */ + for (byte = 0; byte < ECC_Q_NUM_BYTES; byte++) + ecc_compute_bytes(sector, qoffsets[byte], ECC_Q_COMP, §or[ECC_Q_OFFSET + byte], §or[ECC_Q_OFFSET + ECC_Q_NUM_BYTES + byte]); +} + +/** + * @fn void ecc_clear(uint8_t *sector) + * + * @brief ------------------------------------------------- + * ecc_clear - erase the ECC P and Q cods to 0 within a sector + * -------------------------------------------------. + * + * @param [in,out] sector If non-null, the sector. + */ + +void ecc_clear(uint8_t *sector) +{ + memset(§or[ECC_P_OFFSET], 0, 2 * ECC_P_NUM_BYTES); + memset(§or[ECC_Q_OFFSET], 0, 2 * ECC_Q_NUM_BYTES); +} + +#endif /* WANT_RAW_DATA_SECTOR */ diff --git a/cores/saturn/deps/libchdr/src/libchdr_chd.c b/cores/saturn/deps/libchdr/src/libchdr_chd.c new file mode 100644 index 000000000..913abaa2a --- /dev/null +++ b/cores/saturn/deps/libchdr/src/libchdr_chd.c @@ -0,0 +1,2672 @@ +/*************************************************************************** + + chd.c + + MAME Compressed Hunks of Data file format + +**************************************************************************** + + Copyright Aaron Giles + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name 'MAME' nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY AARON GILES ''AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL AARON GILES BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "LzmaEnc.h" +#include "LzmaDec.h" +#include "zlib.h" + +#undef TRUE +#undef FALSE +#define TRUE 1 +#define FALSE 0 + +#undef MAX +#undef MIN +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) + +#define SHA1_DIGEST_SIZE 20 + +/*************************************************************************** + DEBUGGING +***************************************************************************/ + +#define PRINTF_MAX_HUNK (0) + +/*************************************************************************** + CONSTANTS +***************************************************************************/ + +#define MAP_STACK_ENTRIES 512 /* max number of entries to use on the stack */ +#define MAP_ENTRY_SIZE 16 /* V3 and later */ +#define OLD_MAP_ENTRY_SIZE 8 /* V1-V2 */ +#define METADATA_HEADER_SIZE 16 /* metadata header size */ + +#define MAP_ENTRY_FLAG_TYPE_MASK 0x0f /* what type of hunk */ +#define MAP_ENTRY_FLAG_NO_CRC 0x10 /* no CRC is present */ + +#define CHD_V1_SECTOR_SIZE 512 /* size of a "sector" in the V1 header */ + +#define COOKIE_VALUE 0xbaadf00d +#define MAX_ZLIB_ALLOCS 64 + +#define END_OF_LIST_COOKIE "EndOfListCookie" + +#define NO_MATCH (~0) + +#ifdef WANT_RAW_DATA_SECTOR +static const uint8_t s_cd_sync_header[12] = { 0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 }; +#endif + +/* V3-V4 entry types */ +enum +{ + V34_MAP_ENTRY_TYPE_INVALID = 0, /* invalid type */ + V34_MAP_ENTRY_TYPE_COMPRESSED = 1, /* standard compression */ + V34_MAP_ENTRY_TYPE_UNCOMPRESSED = 2, /* uncompressed data */ + V34_MAP_ENTRY_TYPE_MINI = 3, /* mini: use offset as raw data */ + V34_MAP_ENTRY_TYPE_SELF_HUNK = 4, /* same as another hunk in this file */ + V34_MAP_ENTRY_TYPE_PARENT_HUNK = 5, /* same as a hunk in the parent file */ + V34_MAP_ENTRY_TYPE_2ND_COMPRESSED = 6 /* compressed with secondary algorithm (usually FLAC CDDA) */ +}; + +/* V5 compression types */ +enum +{ + /* codec #0 + * these types are live when running */ + COMPRESSION_TYPE_0 = 0, + /* codec #1 */ + COMPRESSION_TYPE_1 = 1, + /* codec #2 */ + COMPRESSION_TYPE_2 = 2, + /* codec #3 */ + COMPRESSION_TYPE_3 = 3, + /* no compression; implicit length = hunkbytes */ + COMPRESSION_NONE = 4, + /* same as another block in this chd */ + COMPRESSION_SELF = 5, + /* same as a hunk's worth of units in the parent chd */ + COMPRESSION_PARENT = 6, + + /* start of small RLE run (4-bit length) + * these additional pseudo-types are used for compressed encodings: */ + COMPRESSION_RLE_SMALL, + /* start of large RLE run (8-bit length) */ + COMPRESSION_RLE_LARGE, + /* same as the last COMPRESSION_SELF block */ + COMPRESSION_SELF_0, + /* same as the last COMPRESSION_SELF block + 1 */ + COMPRESSION_SELF_1, + /* same block in the parent */ + COMPRESSION_PARENT_SELF, + /* same as the last COMPRESSION_PARENT block */ + COMPRESSION_PARENT_0, + /* same as the last COMPRESSION_PARENT block + 1 */ + COMPRESSION_PARENT_1 +}; + +/*************************************************************************** + MACROS +***************************************************************************/ + +#define EARLY_EXIT(x) do { (void)(x); goto cleanup; } while (0) + +/*************************************************************************** + TYPE DEFINITIONS +***************************************************************************/ + +/* interface to a codec */ +typedef struct _codec_interface codec_interface; +struct _codec_interface +{ + UINT32 compression; /* type of compression */ + const char *compname; /* name of the algorithm */ + UINT8 lossy; /* is this a lossy algorithm? */ + chd_error (*init)(void *codec, UINT32 hunkbytes); /* codec initialize */ + void (*free)(void *codec); /* codec free */ + chd_error (*decompress)(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen); /* decompress data */ + chd_error (*config)(void *codec, int param, void *config); /* configure */ +}; + +/* a single map entry */ +typedef struct _map_entry map_entry; +struct _map_entry +{ + UINT64 offset; /* offset within the file of the data */ + UINT32 crc; /* 32-bit CRC of the data */ + UINT32 length; /* length of the data */ + UINT8 flags; /* misc flags */ +}; + +/* a single metadata entry */ +typedef struct _metadata_entry metadata_entry; +struct _metadata_entry +{ + UINT64 offset; /* offset within the file of the header */ + UINT64 next; /* offset within the file of the next header */ + UINT64 prev; /* offset within the file of the previous header */ + UINT32 length; /* length of the metadata */ + UINT32 metatag; /* metadata tag */ + UINT8 flags; /* flag bits */ +}; + +/* codec-private data for the ZLIB codec */ + +typedef struct _zlib_allocator zlib_allocator; +struct _zlib_allocator +{ + UINT32 * allocptr[MAX_ZLIB_ALLOCS]; + UINT32 * allocptr2[MAX_ZLIB_ALLOCS]; +}; + +typedef struct _zlib_codec_data zlib_codec_data; +struct _zlib_codec_data +{ + z_stream inflater; + zlib_allocator allocator; +}; + +/* codec-private data for the LZMA codec */ +#define MAX_LZMA_ALLOCS 64 + +typedef struct _lzma_allocator lzma_allocator; +struct _lzma_allocator +{ + void *(*Alloc)(void *p, size_t size); + void (*Free)(void *p, void *address); /* address can be 0 */ + void (*FreeSz)(void *p, void *address, size_t size); /* address can be 0 */ + uint32_t* allocptr[MAX_LZMA_ALLOCS]; + uint32_t* allocptr2[MAX_LZMA_ALLOCS]; +}; + +typedef struct _lzma_codec_data lzma_codec_data; +struct _lzma_codec_data +{ + CLzmaDec decoder; + lzma_allocator allocator; +}; + +/* codec-private data for the CDZL codec */ +typedef struct _cdzl_codec_data cdzl_codec_data; +struct _cdzl_codec_data { + /* internal state */ + zlib_codec_data base_decompressor; +#ifdef WANT_SUBCODE + zlib_codec_data subcode_decompressor; +#endif + uint8_t* buffer; +}; + +/* codec-private data for the CDLZ codec */ +typedef struct _cdlz_codec_data cdlz_codec_data; +struct _cdlz_codec_data { + /* internal state */ + lzma_codec_data base_decompressor; +#ifdef WANT_SUBCODE + zlib_codec_data subcode_decompressor; +#endif + uint8_t* buffer; +}; + +/* codec-private data for the CDFL codec */ +typedef struct _cdfl_codec_data cdfl_codec_data; +struct _cdfl_codec_data { + /* internal state */ + int swap_endian; + flac_decoder decoder; +#ifdef WANT_SUBCODE + zlib_codec_data subcode_decompressor; +#endif + uint8_t* buffer; +}; + +/* internal representation of an open CHD file */ +struct _chd_file +{ + UINT32 cookie; /* cookie, should equal COOKIE_VALUE */ + + core_file * file; /* handle to the open core file */ + UINT8 owns_file; /* flag indicating if this file should be closed on chd_close() */ + chd_header header; /* header, extracted from file */ + + chd_file * parent; /* pointer to parent file, or NULL */ + + map_entry * map; /* array of map entries */ + +#ifdef NEED_CACHE_HUNK + UINT8 * cache; /* hunk cache pointer */ + UINT32 cachehunk; /* index of currently cached hunk */ + + UINT8 * compare; /* hunk compare pointer */ + UINT32 comparehunk; /* index of current compare data */ +#endif + + UINT8 * compressed; /* pointer to buffer for compressed data */ + const codec_interface * codecintf[4]; /* interface to the codec */ + + zlib_codec_data zlib_codec_data; /* zlib codec data */ + cdzl_codec_data cdzl_codec_data; /* cdzl codec data */ + cdlz_codec_data cdlz_codec_data; /* cdlz codec data */ + cdfl_codec_data cdfl_codec_data; /* cdfl codec data */ + +#ifdef NEED_CACHE_HUNK + UINT32 maxhunk; /* maximum hunk accessed */ +#endif + + UINT8 * file_cache; /* cache of underlying file */ +}; + + +/*************************************************************************** + GLOBAL VARIABLES +***************************************************************************/ + +static const UINT8 nullmd5[CHD_MD5_BYTES] = { 0 }; +static const UINT8 nullsha1[CHD_SHA1_BYTES] = { 0 }; + +/*************************************************************************** + PROTOTYPES +***************************************************************************/ + +/* internal header operations */ +static chd_error header_validate(const chd_header *header); +static chd_error header_read(chd_file *chd, chd_header *header); + +/* internal hunk read/write */ +#ifdef NEED_CACHE_HUNK +static chd_error hunk_read_into_cache(chd_file *chd, UINT32 hunknum); +#endif +static chd_error hunk_read_into_memory(chd_file *chd, UINT32 hunknum, UINT8 *dest); + +/* internal map access */ +static chd_error map_read(chd_file *chd); + +/* metadata management */ +static chd_error metadata_find_entry(chd_file *chd, UINT32 metatag, UINT32 metaindex, metadata_entry *metaentry); + +/* zlib compression codec */ +static chd_error zlib_codec_init(void *codec, uint32_t hunkbytes); +static void zlib_codec_free(void *codec); +static chd_error zlib_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen); +static voidpf zlib_fast_alloc(voidpf opaque, uInt items, uInt size); +static void zlib_fast_free(voidpf opaque, voidpf address); +static void zlib_allocator_free(voidpf opaque); + +/* lzma compression codec */ +static chd_error lzma_codec_init(void *codec, uint32_t hunkbytes); +static void lzma_codec_free(void *codec); +static chd_error lzma_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen); + +/* cdzl compression codec */ +static chd_error cdzl_codec_init(void* codec, uint32_t hunkbytes); +static void cdzl_codec_free(void* codec); +static chd_error cdzl_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen); + +/* cdlz compression codec */ +static chd_error cdlz_codec_init(void* codec, uint32_t hunkbytes); +static void cdlz_codec_free(void* codec); +static chd_error cdlz_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen); + +/* cdfl compression codec */ +static chd_error cdfl_codec_init(void* codec, uint32_t hunkbytes); +static void cdfl_codec_free(void* codec); +static chd_error cdfl_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen); + +/*************************************************************************** + * LZMA ALLOCATOR HELPER + *************************************************************************** + */ + +static void *lzma_fast_alloc(void *p, size_t size); +static void lzma_fast_free(void *p, void *address); + +/*------------------------------------------------- + * lzma_allocator_init + *------------------------------------------------- + */ + +static void lzma_allocator_init(void* p) +{ + lzma_allocator *codec = (lzma_allocator *)(p); + + /* reset pointer list */ + memset(codec->allocptr, 0, sizeof(codec->allocptr)); + memset(codec->allocptr2, 0, sizeof(codec->allocptr2)); + codec->Alloc = lzma_fast_alloc; + codec->Free = lzma_fast_free; +} + +/*------------------------------------------------- + * lzma_allocator_free + *------------------------------------------------- + */ + +static void lzma_allocator_free(void* p ) +{ + int i; + lzma_allocator *codec = (lzma_allocator *)(p); + + /* free our memory */ + for (i = 0 ; i < MAX_LZMA_ALLOCS ; i++) + { + if (codec->allocptr[i] != NULL) + free(codec->allocptr[i]); + } +} + +/*------------------------------------------------- + * lzma_fast_alloc - fast malloc for lzma, which + * allocates and frees memory frequently + *------------------------------------------------- + */ + +/* Huge alignment values for possible SIMD optimization by compiler (NEON, SSE, AVX) */ +#define LZMA_MIN_ALIGNMENT_BITS 512 +#define LZMA_MIN_ALIGNMENT_BYTES (LZMA_MIN_ALIGNMENT_BITS / 8) + +static void *lzma_fast_alloc(void *p, size_t size) +{ + int scan; + uint32_t *addr = NULL; + lzma_allocator *codec = (lzma_allocator *)(p); + uintptr_t vaddr = 0; + + /* compute the size, rounding to the nearest 1k */ + size = (size + 0x3ff) & ~0x3ff; + + /* reuse a hunk if we can */ + for (scan = 0; scan < MAX_LZMA_ALLOCS; scan++) + { + uint32_t *ptr = codec->allocptr[scan]; + if (ptr != NULL && size == *ptr) + { + /* set the low bit of the size so we don't match next time */ + *ptr |= 1; + + /* return aligned address of the block */ + return codec->allocptr2[scan]; + } + } + + /* alloc a new one and put it into the list */ + addr = (uint32_t *)malloc(size + sizeof(uint32_t) + LZMA_MIN_ALIGNMENT_BYTES); + if (addr==NULL) + return NULL; + for (scan = 0; scan < MAX_LZMA_ALLOCS; scan++) + { + if (codec->allocptr[scan] == NULL) + { + /* store block address */ + codec->allocptr[scan] = addr; + + /* compute aligned address, store it */ + vaddr = (uintptr_t)addr; + vaddr = (vaddr + sizeof(uint32_t) + (LZMA_MIN_ALIGNMENT_BYTES-1)) & (~(LZMA_MIN_ALIGNMENT_BYTES-1)); + codec->allocptr2[scan] = (uint32_t*)vaddr; + break; + } + } + + /* set the low bit of the size so we don't match next time */ + *addr = size | 1; + + /* return aligned address */ + return (void*)vaddr; +} + +/*------------------------------------------------- + * lzma_fast_free - fast free for lzma, which + * allocates and frees memory frequently + *------------------------------------------------- + */ + +static void lzma_fast_free(void *p, void *address) +{ + int scan; + uint32_t *ptr = NULL; + lzma_allocator *codec = NULL; + + if (address == NULL) + return; + + codec = (lzma_allocator *)(p); + + /* find the hunk */ + ptr = (uint32_t *)address; + for (scan = 0; scan < MAX_LZMA_ALLOCS; scan++) + { + if (ptr == codec->allocptr2[scan]) + { + /* clear the low bit of the size to allow matches */ + *codec->allocptr[scan] &= ~1; + return; + } + } +} + +/*************************************************************************** + * LZMA DECOMPRESSOR + *************************************************************************** + */ + +/*------------------------------------------------- + * lzma_codec_init - constructor + *------------------------------------------------- + */ + +static chd_error lzma_codec_init(void* codec, uint32_t hunkbytes) +{ + CLzmaEncHandle enc; + CLzmaEncProps encoder_props; + Byte decoder_props[LZMA_PROPS_SIZE]; + SizeT props_size; + lzma_allocator* alloc; + lzma_codec_data* lzma_codec = (lzma_codec_data*) codec; + + /* construct the decoder */ + LzmaDec_Construct(&lzma_codec->decoder); + + /* FIXME: this code is written in a way that makes it impossible to safely upgrade the LZMA SDK + * This code assumes that the current version of the encoder imposes the same requirements on the + * decoder as the encoder used to produce the file. This is not necessarily true. The format + * needs to be changed so the encoder properties are written to the file. + + * configure the properties like the compressor did */ + LzmaEncProps_Init(&encoder_props); + encoder_props.level = 9; + encoder_props.reduceSize = hunkbytes; + LzmaEncProps_Normalize(&encoder_props); + + /* convert to decoder properties */ + alloc = &lzma_codec->allocator; + lzma_allocator_init(alloc); + enc = LzmaEnc_Create((ISzAlloc*)alloc); + if (!enc) + return CHDERR_DECOMPRESSION_ERROR; + if (LzmaEnc_SetProps(enc, &encoder_props) != SZ_OK) + { + LzmaEnc_Destroy(enc, (ISzAlloc*)&alloc, (ISzAlloc*)&alloc); + return CHDERR_DECOMPRESSION_ERROR; + } + props_size = sizeof(decoder_props); + if (LzmaEnc_WriteProperties(enc, decoder_props, &props_size) != SZ_OK) + { + LzmaEnc_Destroy(enc, (ISzAlloc*)alloc, (ISzAlloc*)alloc); + return CHDERR_DECOMPRESSION_ERROR; + } + LzmaEnc_Destroy(enc, (ISzAlloc*)alloc, (ISzAlloc*)alloc); + + /* do memory allocations */ + if (LzmaDec_Allocate(&lzma_codec->decoder, decoder_props, LZMA_PROPS_SIZE, (ISzAlloc*)alloc) != SZ_OK) + return CHDERR_DECOMPRESSION_ERROR; + + /* Okay */ + return CHDERR_NONE; +} + +/*------------------------------------------------- + * lzma_codec_free + *------------------------------------------------- + */ + +static void lzma_codec_free(void* codec) +{ + lzma_codec_data* lzma_codec = (lzma_codec_data*) codec; + + /* free memory */ + LzmaDec_Free(&lzma_codec->decoder, (ISzAlloc*)&lzma_codec->allocator); + lzma_allocator_free(&lzma_codec->allocator); +} + +/*------------------------------------------------- + * decompress - decompress data using the LZMA + * codec + *------------------------------------------------- + */ + +static chd_error lzma_codec_decompress(void* codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen) +{ + ELzmaStatus status; + SRes res; + SizeT consumedlen, decodedlen; + /* initialize */ + lzma_codec_data* lzma_codec = (lzma_codec_data*) codec; + LzmaDec_Init(&lzma_codec->decoder); + + /* decode */ + consumedlen = complen; + decodedlen = destlen; + res = LzmaDec_DecodeToBuf(&lzma_codec->decoder, dest, &decodedlen, src, &consumedlen, LZMA_FINISH_END, &status); + if ((res != SZ_OK && res != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK) || consumedlen != complen || decodedlen != destlen) + return CHDERR_DECOMPRESSION_ERROR; + return CHDERR_NONE; +} + +/* cdlz */ +static chd_error cdlz_codec_init(void* codec, uint32_t hunkbytes) +{ + chd_error ret; + cdlz_codec_data* cdlz = (cdlz_codec_data*) codec; + + /* allocate buffer */ + cdlz->buffer = (uint8_t*)malloc(sizeof(uint8_t) * hunkbytes); + if (cdlz->buffer == NULL) + return CHDERR_OUT_OF_MEMORY; + + /* make sure the CHD's hunk size is an even multiple of the frame size */ + ret = lzma_codec_init(&cdlz->base_decompressor, (hunkbytes / CD_FRAME_SIZE) * CD_MAX_SECTOR_DATA); + if (ret != CHDERR_NONE) + return ret; + +#ifdef WANT_SUBCODE + ret = zlib_codec_init(&cdlz->subcode_decompressor, (hunkbytes / CD_FRAME_SIZE) * CD_MAX_SUBCODE_DATA); + if (ret != CHDERR_NONE) + return ret; +#endif + + if (hunkbytes % CD_FRAME_SIZE != 0) + return CHDERR_CODEC_ERROR; + + return CHDERR_NONE; +} + +static void cdlz_codec_free(void* codec) +{ + cdlz_codec_data* cdlz = (cdlz_codec_data*) codec; + free(cdlz->buffer); + lzma_codec_free(&cdlz->base_decompressor); +#ifdef WANT_SUBCODE + zlib_codec_free(&cdlz->subcode_decompressor); +#endif +} + +static chd_error cdlz_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen) +{ + uint32_t framenum; + cdlz_codec_data* cdlz = (cdlz_codec_data*)codec; + + /* determine header bytes */ + uint32_t frames = destlen / CD_FRAME_SIZE; + uint32_t complen_bytes = (destlen < 65536) ? 2 : 3; + uint32_t ecc_bytes = (frames + 7) / 8; + uint32_t header_bytes = ecc_bytes + complen_bytes; + + /* extract compressed length of base */ + uint32_t complen_base = (src[ecc_bytes + 0] << 8) | src[ecc_bytes + 1]; + if (complen_bytes > 2) + complen_base = (complen_base << 8) | src[ecc_bytes + 2]; + + /* reset and decode */ + lzma_codec_decompress(&cdlz->base_decompressor, &src[header_bytes], complen_base, &cdlz->buffer[0], frames * CD_MAX_SECTOR_DATA); +#ifdef WANT_SUBCODE + zlib_codec_decompress(&cdlz->subcode_decompressor, &src[header_bytes + complen_base], complen - complen_base - header_bytes, &cdlz->buffer[frames * CD_MAX_SECTOR_DATA], frames * CD_MAX_SUBCODE_DATA); +#endif + + /* reassemble the data */ + for (framenum = 0; framenum < frames; framenum++) + { + uint8_t *sector; + + memcpy(&dest[framenum * CD_FRAME_SIZE], &cdlz->buffer[framenum * CD_MAX_SECTOR_DATA], CD_MAX_SECTOR_DATA); +#ifdef WANT_SUBCODE + memcpy(&dest[framenum * CD_FRAME_SIZE + CD_MAX_SECTOR_DATA], &cdlz->buffer[frames * CD_MAX_SECTOR_DATA + framenum * CD_MAX_SUBCODE_DATA], CD_MAX_SUBCODE_DATA); +#endif + +#ifdef WANT_RAW_DATA_SECTOR + /* reconstitute the ECC data and sync header */ + sector = (uint8_t *)&dest[framenum * CD_FRAME_SIZE]; + if ((src[framenum / 8] & (1 << (framenum % 8))) != 0) + { + memcpy(sector, s_cd_sync_header, sizeof(s_cd_sync_header)); + ecc_generate(sector); + } +#endif + } + return CHDERR_NONE; +} + +/* cdzl */ + +static chd_error cdzl_codec_init(void *codec, uint32_t hunkbytes) +{ + chd_error ret; + cdzl_codec_data* cdzl = (cdzl_codec_data*)codec; + + /* make sure the CHD's hunk size is an even multiple of the frame size */ + if (hunkbytes % CD_FRAME_SIZE != 0) + return CHDERR_CODEC_ERROR; + + cdzl->buffer = (uint8_t*)malloc(sizeof(uint8_t) * hunkbytes); + if (cdzl->buffer == NULL) + return CHDERR_OUT_OF_MEMORY; + + ret = zlib_codec_init(&cdzl->base_decompressor, (hunkbytes / CD_FRAME_SIZE) * CD_MAX_SECTOR_DATA); + if (ret != CHDERR_NONE) + return ret; + +#ifdef WANT_SUBCODE + ret = zlib_codec_init(&cdzl->subcode_decompressor, (hunkbytes / CD_FRAME_SIZE) * CD_MAX_SUBCODE_DATA); + if (ret != CHDERR_NONE) + return ret; +#endif + + return CHDERR_NONE; +} + +static void cdzl_codec_free(void *codec) +{ + cdzl_codec_data* cdzl = (cdzl_codec_data*)codec; + zlib_codec_free(&cdzl->base_decompressor); +#ifdef WANT_SUBCODE + zlib_codec_free(&cdzl->subcode_decompressor); +#endif + free(cdzl->buffer); +} + +static chd_error cdzl_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen) +{ + uint32_t framenum; + cdzl_codec_data* cdzl = (cdzl_codec_data*)codec; + + /* determine header bytes */ + uint32_t frames = destlen / CD_FRAME_SIZE; + uint32_t complen_bytes = (destlen < 65536) ? 2 : 3; + uint32_t ecc_bytes = (frames + 7) / 8; + uint32_t header_bytes = ecc_bytes + complen_bytes; + + /* extract compressed length of base */ + uint32_t complen_base = (src[ecc_bytes + 0] << 8) | src[ecc_bytes + 1]; + if (complen_bytes > 2) + complen_base = (complen_base << 8) | src[ecc_bytes + 2]; + + /* reset and decode */ + zlib_codec_decompress(&cdzl->base_decompressor, &src[header_bytes], complen_base, &cdzl->buffer[0], frames * CD_MAX_SECTOR_DATA); +#ifdef WANT_SUBCODE + zlib_codec_decompress(&cdzl->subcode_decompressor, &src[header_bytes + complen_base], complen - complen_base - header_bytes, &cdzl->buffer[frames * CD_MAX_SECTOR_DATA], frames * CD_MAX_SUBCODE_DATA); +#endif + + /* reassemble the data */ + for (framenum = 0; framenum < frames; framenum++) + { + uint8_t *sector; + + memcpy(&dest[framenum * CD_FRAME_SIZE], &cdzl->buffer[framenum * CD_MAX_SECTOR_DATA], CD_MAX_SECTOR_DATA); +#ifdef WANT_SUBCODE + memcpy(&dest[framenum * CD_FRAME_SIZE + CD_MAX_SECTOR_DATA], &cdzl->buffer[frames * CD_MAX_SECTOR_DATA + framenum * CD_MAX_SUBCODE_DATA], CD_MAX_SUBCODE_DATA); +#endif + +#ifdef WANT_RAW_DATA_SECTOR + /* reconstitute the ECC data and sync header */ + sector = (uint8_t *)&dest[framenum * CD_FRAME_SIZE]; + if ((src[framenum / 8] & (1 << (framenum % 8))) != 0) + { + memcpy(sector, s_cd_sync_header, sizeof(s_cd_sync_header)); + ecc_generate(sector); + } +#endif + } + return CHDERR_NONE; +} + +/*************************************************************************** + * CD FLAC DECOMPRESSOR + *************************************************************************** + */ + +/*------------------------------------------------------ + * cdfl_codec_blocksize - return the optimal block size + *------------------------------------------------------ + */ + +static uint32_t cdfl_codec_blocksize(uint32_t bytes) +{ + /* determine FLAC block size, which must be 16-65535 + * clamp to 2k since that's supposed to be the sweet spot */ + uint32_t hunkbytes = bytes / 4; + while (hunkbytes > 2048) + hunkbytes /= 2; + return hunkbytes; +} + +static chd_error cdfl_codec_init(void *codec, uint32_t hunkbytes) +{ +#ifdef WANT_SUBCODE + chd_error ret; +#endif + uint16_t native_endian = 0; + cdfl_codec_data *cdfl = (cdfl_codec_data*)codec; + + /* make sure the CHD's hunk size is an even multiple of the frame size */ + if (hunkbytes % CD_FRAME_SIZE != 0) + return CHDERR_CODEC_ERROR; + + cdfl->buffer = (uint8_t*)malloc(sizeof(uint8_t) * hunkbytes); + if (cdfl->buffer == NULL) + return CHDERR_OUT_OF_MEMORY; + + /* determine whether we want native or swapped samples */ + *(uint8_t *)(&native_endian) = 1; + cdfl->swap_endian = (native_endian & 1); + +#ifdef WANT_SUBCODE + /* init zlib inflater */ + ret = zlib_codec_init(&cdfl->subcode_decompressor, (hunkbytes / CD_FRAME_SIZE) * CD_MAX_SECTOR_DATA); + if (ret != CHDERR_NONE) + return ret; +#endif + + /* flac decoder init */ + if (flac_decoder_init(&cdfl->decoder)) + return CHDERR_OUT_OF_MEMORY; + + return CHDERR_NONE; +} + +static void cdfl_codec_free(void *codec) +{ + cdfl_codec_data *cdfl = (cdfl_codec_data*)codec; + flac_decoder_free(&cdfl->decoder); +#ifdef WANT_SUBCODE + zlib_codec_free(&cdfl->subcode_decompressor); +#endif + if (cdfl->buffer) + free(cdfl->buffer); +} + +static chd_error cdfl_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen) +{ + uint32_t framenum; + uint8_t *buffer; +#ifdef WANT_SUBCODE + uint32_t offset; + chd_error ret; +#endif + cdfl_codec_data *cdfl = (cdfl_codec_data*)codec; + + /* reset and decode */ + uint32_t frames = destlen / CD_FRAME_SIZE; + + if (!flac_decoder_reset(&cdfl->decoder, 44100, 2, cdfl_codec_blocksize(frames * CD_MAX_SECTOR_DATA), src, complen)) + return CHDERR_DECOMPRESSION_ERROR; + buffer = &cdfl->buffer[0]; + if (!flac_decoder_decode_interleaved(&cdfl->decoder, (int16_t *)(buffer), frames * CD_MAX_SECTOR_DATA/4, cdfl->swap_endian)) + return CHDERR_DECOMPRESSION_ERROR; + +#ifdef WANT_SUBCODE + /* inflate the subcode data */ + offset = flac_decoder_finish(&cdfl->decoder); + ret = zlib_codec_decompress(&cdfl->subcode_decompressor, src + offset, complen - offset, &cdfl->buffer[frames * CD_MAX_SECTOR_DATA], frames * CD_MAX_SUBCODE_DATA); + if (ret != CHDERR_NONE) + return ret; +#else + flac_decoder_finish(&cdfl->decoder); +#endif + + /* reassemble the data */ + for (framenum = 0; framenum < frames; framenum++) + { + memcpy(&dest[framenum * CD_FRAME_SIZE], &cdfl->buffer[framenum * CD_MAX_SECTOR_DATA], CD_MAX_SECTOR_DATA); +#ifdef WANT_SUBCODE + memcpy(&dest[framenum * CD_FRAME_SIZE + CD_MAX_SECTOR_DATA], &cdfl->buffer[frames * CD_MAX_SECTOR_DATA + framenum * CD_MAX_SUBCODE_DATA], CD_MAX_SUBCODE_DATA); +#endif + } + + return CHDERR_NONE; +} +/*************************************************************************** + CODEC INTERFACES +***************************************************************************/ + +static const codec_interface codec_interfaces[] = +{ + /* "none" or no compression */ + { + CHDCOMPRESSION_NONE, + "none", + FALSE, + NULL, + NULL, + NULL, + NULL + }, + + /* standard zlib compression */ + { + CHDCOMPRESSION_ZLIB, + "zlib", + FALSE, + zlib_codec_init, + zlib_codec_free, + zlib_codec_decompress, + NULL + }, + + /* zlib+ compression */ + { + CHDCOMPRESSION_ZLIB_PLUS, + "zlib+", + FALSE, + zlib_codec_init, + zlib_codec_free, + zlib_codec_decompress, + NULL + }, + + /* V5 zlib compression */ + { + CHD_CODEC_ZLIB, + "zlib (Deflate)", + FALSE, + zlib_codec_init, + zlib_codec_free, + zlib_codec_decompress, + NULL + }, + + /* V5 CD zlib compression */ + { + CHD_CODEC_CD_ZLIB, + "cdzl (CD Deflate)", + FALSE, + cdzl_codec_init, + cdzl_codec_free, + cdzl_codec_decompress, + NULL + }, + + /* V5 CD lzma compression */ + { + CHD_CODEC_CD_LZMA, + "cdlz (CD LZMA)", + FALSE, + cdlz_codec_init, + cdlz_codec_free, + cdlz_codec_decompress, + NULL + }, + + /* V5 CD flac compression */ + { + CHD_CODEC_CD_FLAC, + "cdfl (CD FLAC)", + FALSE, + cdfl_codec_init, + cdfl_codec_free, + cdfl_codec_decompress, + NULL + }, +}; + +/*************************************************************************** + INLINE FUNCTIONS +***************************************************************************/ + +/*------------------------------------------------- + get_bigendian_uint64 - fetch a UINT64 from + the data stream in bigendian order +-------------------------------------------------*/ + +static inline UINT64 get_bigendian_uint64(const UINT8 *base) +{ + return ((UINT64)base[0] << 56) | ((UINT64)base[1] << 48) | ((UINT64)base[2] << 40) | ((UINT64)base[3] << 32) | + ((UINT64)base[4] << 24) | ((UINT64)base[5] << 16) | ((UINT64)base[6] << 8) | (UINT64)base[7]; +} + +/*------------------------------------------------- + put_bigendian_uint64 - write a UINT64 to + the data stream in bigendian order +-------------------------------------------------*/ + +static inline void put_bigendian_uint64(UINT8 *base, UINT64 value) +{ + base[0] = value >> 56; + base[1] = value >> 48; + base[2] = value >> 40; + base[3] = value >> 32; + base[4] = value >> 24; + base[5] = value >> 16; + base[6] = value >> 8; + base[7] = value; +} + +/*------------------------------------------------- + get_bigendian_uint48 - fetch a UINT48 from + the data stream in bigendian order +-------------------------------------------------*/ + +static inline UINT64 get_bigendian_uint48(const UINT8 *base) +{ + return ((UINT64)base[0] << 40) | ((UINT64)base[1] << 32) | + ((UINT64)base[2] << 24) | ((UINT64)base[3] << 16) | ((UINT64)base[4] << 8) | (UINT64)base[5]; +} + +/*------------------------------------------------- + put_bigendian_uint48 - write a UINT48 to + the data stream in bigendian order +-------------------------------------------------*/ + +static inline void put_bigendian_uint48(UINT8 *base, UINT64 value) +{ + value &= 0xffffffffffff; + base[0] = value >> 40; + base[1] = value >> 32; + base[2] = value >> 24; + base[3] = value >> 16; + base[4] = value >> 8; + base[5] = value; +} +/*------------------------------------------------- + get_bigendian_uint32 - fetch a UINT32 from + the data stream in bigendian order +-------------------------------------------------*/ + +static inline UINT32 get_bigendian_uint32(const UINT8 *base) +{ + return (base[0] << 24) | (base[1] << 16) | (base[2] << 8) | base[3]; +} + +/*------------------------------------------------- + put_bigendian_uint32 - write a UINT32 to + the data stream in bigendian order +-------------------------------------------------*/ + +static inline void put_bigendian_uint32(UINT8 *base, UINT32 value) +{ + base[0] = value >> 24; + base[1] = value >> 16; + base[2] = value >> 8; + base[3] = value; +} + +/*------------------------------------------------- + put_bigendian_uint24 - write a UINT24 to + the data stream in bigendian order +-------------------------------------------------*/ + +static inline void put_bigendian_uint24(UINT8 *base, UINT32 value) +{ + value &= 0xffffff; + base[0] = value >> 16; + base[1] = value >> 8; + base[2] = value; +} + +/*------------------------------------------------- + get_bigendian_uint24 - fetch a UINT24 from + the data stream in bigendian order +-------------------------------------------------*/ + +static inline UINT32 get_bigendian_uint24(const UINT8 *base) +{ + return (base[0] << 16) | (base[1] << 8) | base[2]; +} + +/*------------------------------------------------- + get_bigendian_uint16 - fetch a UINT16 from + the data stream in bigendian order +-------------------------------------------------*/ + +static inline UINT16 get_bigendian_uint16(const UINT8 *base) +{ + return (base[0] << 8) | base[1]; +} + +/*------------------------------------------------- + put_bigendian_uint16 - write a UINT16 to + the data stream in bigendian order +-------------------------------------------------*/ + +static inline void put_bigendian_uint16(UINT8 *base, UINT16 value) +{ + base[0] = value >> 8; + base[1] = value; +} + +/*------------------------------------------------- + map_extract - extract a single map + entry from the datastream +-------------------------------------------------*/ + +static inline void map_extract(const UINT8 *base, map_entry *entry) +{ + entry->offset = get_bigendian_uint64(&base[0]); + entry->crc = get_bigendian_uint32(&base[8]); + entry->length = get_bigendian_uint16(&base[12]) | (base[14] << 16); + entry->flags = base[15]; +} + +/*------------------------------------------------- + map_assemble - write a single map + entry to the datastream +-------------------------------------------------*/ + +static inline void map_assemble(UINT8 *base, map_entry *entry) +{ + put_bigendian_uint64(&base[0], entry->offset); + put_bigendian_uint32(&base[8], entry->crc); + put_bigendian_uint16(&base[12], entry->length); + base[14] = entry->length >> 16; + base[15] = entry->flags; +} + +/*------------------------------------------------- + map_size_v5 - calculate CHDv5 map size +-------------------------------------------------*/ +static inline int map_size_v5(chd_header* header) +{ + return header->hunkcount * header->mapentrybytes; +} + +/*------------------------------------------------- + crc16 - calculate CRC16 (from hashing.cpp) +-------------------------------------------------*/ +uint16_t crc16(const void *data, uint32_t length) +{ + uint16_t crc = 0xffff; + + static const uint16_t s_table[256] = + { + 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, + 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, + 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6, + 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, + 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485, + 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d, + 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4, + 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc, + 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823, + 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, + 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12, + 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a, + 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41, + 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49, + 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70, + 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, + 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f, + 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067, + 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, + 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256, + 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d, + 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, + 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c, + 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634, + 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, + 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3, + 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a, + 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92, + 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9, + 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1, + 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, + 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0 + }; + + const uint8_t *src = (uint8_t*)data; + + /* fetch the current value into a local and rip through the source data */ + while (length-- != 0) + crc = (crc << 8) ^ s_table[(crc >> 8) ^ *src++]; + return crc; +} + +/*------------------------------------------------- + compressed - test if CHD file is compressed ++-------------------------------------------------*/ +static inline int chd_compressed(chd_header* header) { + return header->compression[0] != CHD_CODEC_NONE; +} + +/*------------------------------------------------- + decompress_v5_map - decompress the v5 map +-------------------------------------------------*/ + +static chd_error decompress_v5_map(chd_file* chd, chd_header* header) +{ + int result = 0; + int hunknum; + int repcount = 0; + uint8_t lastcomp = 0; + uint32_t last_self = 0; + uint64_t last_parent = 0; + struct bitstream* bitbuf; + uint32_t mapbytes; + uint64_t firstoffs; + uint16_t mapcrc; + uint8_t lengthbits; + uint8_t selfbits; + uint8_t parentbits; + uint8_t *compressed_ptr; + uint8_t rawbuf[16]; + struct huffman_decoder* decoder; + enum huffman_error err; + uint64_t curoffset; + int rawmapsize = map_size_v5(header); + + if (!chd_compressed(header)) + { + header->rawmap = (uint8_t*)malloc(rawmapsize); + core_fseek(chd->file, header->mapoffset, SEEK_SET); + result = core_fread(chd->file, header->rawmap, rawmapsize); + return CHDERR_NONE; + } + + /* read the reader */ + core_fseek(chd->file, header->mapoffset, SEEK_SET); + result = core_fread(chd->file, rawbuf, sizeof(rawbuf)); + mapbytes = get_bigendian_uint32(&rawbuf[0]); + firstoffs = get_bigendian_uint48(&rawbuf[4]); + mapcrc = get_bigendian_uint16(&rawbuf[10]); + lengthbits = rawbuf[12]; + selfbits = rawbuf[13]; + parentbits = rawbuf[14]; + + /* now read the map */ + compressed_ptr = (uint8_t*)malloc(sizeof(uint8_t) * mapbytes); + core_fseek(chd->file, header->mapoffset + 16, SEEK_SET); + result = core_fread(chd->file, compressed_ptr, mapbytes); + bitbuf = create_bitstream(compressed_ptr, sizeof(uint8_t) * mapbytes); + header->rawmap = (uint8_t*)malloc(rawmapsize); + + /* first decode the compression types */ + decoder = create_huffman_decoder(16, 8); + if (decoder == NULL) + { + free(compressed_ptr); + free(bitbuf); + return CHDERR_OUT_OF_MEMORY; + } + + err = huffman_import_tree_rle(decoder, bitbuf); + if (err != HUFFERR_NONE) + { + free(compressed_ptr); + free(bitbuf); + delete_huffman_decoder(decoder); + return CHDERR_DECOMPRESSION_ERROR; + } + + for (hunknum = 0; hunknum < header->hunkcount; hunknum++) + { + uint8_t *rawmap = header->rawmap + (hunknum * 12); + if (repcount > 0) + rawmap[0] = lastcomp, repcount--; + else + { + uint8_t val = huffman_decode_one(decoder, bitbuf); + if (val == COMPRESSION_RLE_SMALL) + rawmap[0] = lastcomp, repcount = 2 + huffman_decode_one(decoder, bitbuf); + else if (val == COMPRESSION_RLE_LARGE) + rawmap[0] = lastcomp, repcount = 2 + 16 + (huffman_decode_one(decoder, bitbuf) << 4), repcount += huffman_decode_one(decoder, bitbuf); + else + rawmap[0] = lastcomp = val; + } + } + + /* then iterate through the hunks and extract the needed data */ + curoffset = firstoffs; + for (hunknum = 0; hunknum < header->hunkcount; hunknum++) + { + uint8_t *rawmap = header->rawmap + (hunknum * 12); + uint64_t offset = curoffset; + uint32_t length = 0; + uint16_t crc = 0; + switch (rawmap[0]) + { + /* base types */ + case COMPRESSION_TYPE_0: + case COMPRESSION_TYPE_1: + case COMPRESSION_TYPE_2: + case COMPRESSION_TYPE_3: + curoffset += length = bitstream_read(bitbuf, lengthbits); + crc = bitstream_read(bitbuf, 16); + break; + + case COMPRESSION_NONE: + curoffset += length = header->hunkbytes; + crc = bitstream_read(bitbuf, 16); + break; + + case COMPRESSION_SELF: + last_self = offset = bitstream_read(bitbuf, selfbits); + break; + + case COMPRESSION_PARENT: + offset = bitstream_read(bitbuf, parentbits); + last_parent = offset; + break; + + /* pseudo-types; convert into base types */ + case COMPRESSION_SELF_1: + last_self++; + case COMPRESSION_SELF_0: + rawmap[0] = COMPRESSION_SELF; + offset = last_self; + break; + + case COMPRESSION_PARENT_SELF: + rawmap[0] = COMPRESSION_PARENT; + last_parent = offset = ( ((uint64_t)hunknum) * ((uint64_t)header->hunkbytes) ) / header->unitbytes; + break; + + case COMPRESSION_PARENT_1: + last_parent += header->hunkbytes / header->unitbytes; + case COMPRESSION_PARENT_0: + rawmap[0] = COMPRESSION_PARENT; + offset = last_parent; + break; + } + /* UINT24 length */ + put_bigendian_uint24(&rawmap[1], length); + + /* UINT48 offset */ + put_bigendian_uint48(&rawmap[4], offset); + + /* crc16 */ + put_bigendian_uint16(&rawmap[10], crc); + } + + /* free memory */ + free(compressed_ptr); + free(bitbuf); + delete_huffman_decoder(decoder); + + /* verify the final CRC */ + if (crc16(&header->rawmap[0], header->hunkcount * 12) != mapcrc) + return CHDERR_DECOMPRESSION_ERROR; + + return CHDERR_NONE; +} + +/*------------------------------------------------- + map_extract_old - extract a single map + entry in old format from the datastream +-------------------------------------------------*/ + +static inline void map_extract_old(const UINT8 *base, map_entry *entry, UINT32 hunkbytes) +{ + entry->offset = get_bigendian_uint64(&base[0]); + entry->crc = 0; + entry->length = entry->offset >> 44; + entry->flags = MAP_ENTRY_FLAG_NO_CRC | ((entry->length == hunkbytes) ? V34_MAP_ENTRY_TYPE_UNCOMPRESSED : V34_MAP_ENTRY_TYPE_COMPRESSED); +#ifdef __MWERKS__ + entry->offset = entry->offset & 0x00000FFFFFFFFFFFLL; +#else + entry->offset = (entry->offset << 20) >> 20; +#endif +} + +/*************************************************************************** + CHD FILE MANAGEMENT +***************************************************************************/ + +/*------------------------------------------------- + chd_open_file - open a CHD file for access +-------------------------------------------------*/ + +CHD_EXPORT chd_error chd_open_file(core_file *file, int mode, chd_file *parent, chd_file **chd) +{ + chd_file *newchd = NULL; + chd_error err; + int intfnum; + + /* verify parameters */ + if (file == NULL) + EARLY_EXIT(err = CHDERR_INVALID_PARAMETER); + + /* punt if invalid parent */ + if (parent != NULL && parent->cookie != COOKIE_VALUE) + EARLY_EXIT(err = CHDERR_INVALID_PARAMETER); + + /* allocate memory for the final result */ + newchd = (chd_file *)malloc(sizeof(**chd)); + if (newchd == NULL) + EARLY_EXIT(err = CHDERR_OUT_OF_MEMORY); + memset(newchd, 0, sizeof(*newchd)); + newchd->cookie = COOKIE_VALUE; + newchd->parent = parent; + newchd->file = file; + + /* now attempt to read the header */ + err = header_read(newchd, &newchd->header); + if (err != CHDERR_NONE) + EARLY_EXIT(err); + + /* validate the header */ + err = header_validate(&newchd->header); + if (err != CHDERR_NONE) + EARLY_EXIT(err); + + /* make sure we don't open a read-only file writeable */ + if (mode == CHD_OPEN_READWRITE && !(newchd->header.flags & CHDFLAGS_IS_WRITEABLE)) + EARLY_EXIT(err = CHDERR_FILE_NOT_WRITEABLE); + + /* also, never open an older version writeable */ + if (mode == CHD_OPEN_READWRITE && newchd->header.version < CHD_HEADER_VERSION) + EARLY_EXIT(err = CHDERR_UNSUPPORTED_VERSION); + + /* if we need a parent, make sure we have one */ + if (parent == NULL && (newchd->header.flags & CHDFLAGS_HAS_PARENT)) + EARLY_EXIT(err = CHDERR_REQUIRES_PARENT); + + /* make sure we have a valid parent */ + if (parent != NULL) + { + /* check MD5 if it isn't empty */ + if (memcmp(nullmd5, newchd->header.parentmd5, sizeof(newchd->header.parentmd5)) != 0 && + memcmp(nullmd5, newchd->parent->header.md5, sizeof(newchd->parent->header.md5)) != 0 && + memcmp(newchd->parent->header.md5, newchd->header.parentmd5, sizeof(newchd->header.parentmd5)) != 0) + EARLY_EXIT(err = CHDERR_INVALID_PARENT); + + /* check SHA1 if it isn't empty */ + if (memcmp(nullsha1, newchd->header.parentsha1, sizeof(newchd->header.parentsha1)) != 0 && + memcmp(nullsha1, newchd->parent->header.sha1, sizeof(newchd->parent->header.sha1)) != 0 && + memcmp(newchd->parent->header.sha1, newchd->header.parentsha1, sizeof(newchd->header.parentsha1)) != 0) + EARLY_EXIT(err = CHDERR_INVALID_PARENT); + } + + /* now read the hunk map */ + if (newchd->header.version < 5) + { + err = map_read(newchd); + if (err != CHDERR_NONE) + EARLY_EXIT(err); + } + else + { + err = decompress_v5_map(newchd, &(newchd->header)); + } + if (err != CHDERR_NONE) + EARLY_EXIT(err); + +#ifdef NEED_CACHE_HUNK + /* allocate and init the hunk cache */ + newchd->cache = (UINT8 *)malloc(newchd->header.hunkbytes); + newchd->compare = (UINT8 *)malloc(newchd->header.hunkbytes); + if (newchd->cache == NULL || newchd->compare == NULL) + EARLY_EXIT(err = CHDERR_OUT_OF_MEMORY); + newchd->cachehunk = ~0; + newchd->comparehunk = ~0; +#endif + + /* allocate the temporary compressed buffer */ + newchd->compressed = (UINT8 *)malloc(newchd->header.hunkbytes); + if (newchd->compressed == NULL) + EARLY_EXIT(err = CHDERR_OUT_OF_MEMORY); + + /* find the codec interface */ + if (newchd->header.version < 5) + { + for (intfnum = 0; intfnum < ARRAY_LENGTH(codec_interfaces); intfnum++) + { + if (codec_interfaces[intfnum].compression == newchd->header.compression[0]) + { + newchd->codecintf[0] = &codec_interfaces[intfnum]; + break; + } + } + + if (intfnum == ARRAY_LENGTH(codec_interfaces)) + EARLY_EXIT(err = CHDERR_UNSUPPORTED_FORMAT); + + /* initialize the codec */ + if (newchd->codecintf[0]->init != NULL) + { + err = (*newchd->codecintf[0]->init)(&newchd->zlib_codec_data, newchd->header.hunkbytes); + if (err != CHDERR_NONE) + EARLY_EXIT(err); + } + } + else + { + int decompnum; + /* verify the compression types and initialize the codecs */ + for (decompnum = 0; decompnum < ARRAY_LENGTH(newchd->header.compression); decompnum++) + { + int i; + for (i = 0 ; i < ARRAY_LENGTH(codec_interfaces) ; i++) + { + if (codec_interfaces[i].compression == newchd->header.compression[decompnum]) + { + newchd->codecintf[decompnum] = &codec_interfaces[i]; + break; + } + } + + if (newchd->codecintf[decompnum] == NULL && newchd->header.compression[decompnum] != 0) + EARLY_EXIT(err = CHDERR_UNSUPPORTED_FORMAT); + + /* initialize the codec */ + if (newchd->codecintf[decompnum]->init != NULL) + { + void* codec = NULL; + switch (newchd->header.compression[decompnum]) + { + case CHD_CODEC_ZLIB: + codec = &newchd->zlib_codec_data; + break; + + case CHD_CODEC_CD_ZLIB: + codec = &newchd->cdzl_codec_data; + break; + + case CHD_CODEC_CD_LZMA: + codec = &newchd->cdlz_codec_data; + break; + + case CHD_CODEC_CD_FLAC: + codec = &newchd->cdfl_codec_data; + break; + } + + if (codec == NULL) + EARLY_EXIT(err = CHDERR_UNSUPPORTED_FORMAT); + + err = (*newchd->codecintf[decompnum]->init)(codec, newchd->header.hunkbytes); + if (err != CHDERR_NONE) + EARLY_EXIT(err); + } + } + } + + /* all done */ + *chd = newchd; + return CHDERR_NONE; + +cleanup: + if (newchd != NULL) + chd_close(newchd); + return err; +} + +/*------------------------------------------------- + chd_precache - precache underlying file in + memory +-------------------------------------------------*/ + +CHD_EXPORT chd_error chd_precache(chd_file *chd) +{ +#ifdef _MSC_VER + size_t size, count; +#else + ssize_t size, count; +#endif + + if (chd->file_cache == NULL) + { + core_fseek(chd->file, 0, SEEK_END); + size = core_ftell(chd->file); + if (size <= 0) + return CHDERR_INVALID_DATA; + chd->file_cache = malloc(size); + if (chd->file_cache == NULL) + return CHDERR_OUT_OF_MEMORY; + core_fseek(chd->file, 0, SEEK_SET); + count = core_fread(chd->file, chd->file_cache, size); + if (count != size) + { + free(chd->file_cache); + chd->file_cache = NULL; + return CHDERR_READ_ERROR; + } + } + + return CHDERR_NONE; +} + +/*------------------------------------------------- + chd_open - open a CHD file by + filename +-------------------------------------------------*/ + +CHD_EXPORT chd_error chd_open(const char *filename, int mode, chd_file *parent, chd_file **chd) +{ + chd_error err; + core_file *file = NULL; + + /* choose the proper mode */ + switch(mode) + { + case CHD_OPEN_READ: + break; + + default: + err = CHDERR_INVALID_PARAMETER; + goto cleanup; + } + + /* open the file */ + file = core_fopen(filename); + if (file == 0) + { + err = CHDERR_FILE_NOT_FOUND; + goto cleanup; + } + + /* now open the CHD */ + err = chd_open_file(file, mode, parent, chd); + if (err != CHDERR_NONE) + goto cleanup; + + /* we now own this file */ + (*chd)->owns_file = TRUE; + +cleanup: + if ((err != CHDERR_NONE) && (file != NULL)) + core_fclose(file); + return err; +} + +/*------------------------------------------------- + chd_close - close a CHD file for access +-------------------------------------------------*/ + +CHD_EXPORT void chd_close(chd_file *chd) +{ + /* punt if NULL or invalid */ + if (chd == NULL || chd->cookie != COOKIE_VALUE) + return; + + /* deinit the codec */ + if (chd->header.version < 5) + { + if (chd->codecintf[0] != NULL && chd->codecintf[0]->free != NULL) + (*chd->codecintf[0]->free)(&chd->zlib_codec_data); + } + else + { + int i; + /* Free the codecs */ + for (i = 0 ; i < ARRAY_LENGTH(chd->codecintf); i++) + { + void* codec = NULL; + + if (chd->codecintf[i] == NULL) + continue; + + switch (chd->codecintf[i]->compression) + { + case CHD_CODEC_CD_LZMA: + codec = &chd->cdlz_codec_data; + break; + + case CHD_CODEC_ZLIB: + codec = &chd->zlib_codec_data; + break; + + case CHD_CODEC_CD_ZLIB: + codec = &chd->cdzl_codec_data; + break; + + case CHD_CODEC_CD_FLAC: + codec = &chd->cdfl_codec_data; + break; + } + + if (codec) + { + (*chd->codecintf[i]->free)(codec); + } + } + + /* Free the raw map */ + if (chd->header.rawmap != NULL) + free(chd->header.rawmap); + } + + /* free the compressed data buffer */ + if (chd->compressed != NULL) + free(chd->compressed); + +#ifdef NEED_CACHE_HUNK + /* free the hunk cache and compare data */ + if (chd->compare != NULL) + free(chd->compare); + if (chd->cache != NULL) + free(chd->cache); +#endif + + /* free the hunk map */ + if (chd->map != NULL) + free(chd->map); + + /* close the file */ + if (chd->owns_file && chd->file != NULL) + core_fclose(chd->file); + +#ifdef NEED_CACHE_HUNK + if (PRINTF_MAX_HUNK) printf("Max hunk = %d/%d\n", chd->maxhunk, chd->header.totalhunks); +#endif + if (chd->file_cache) + free(chd->file_cache); + + /* free our memory */ + free(chd); +} + +/*------------------------------------------------- + chd_core_file - return the associated + core_file +-------------------------------------------------*/ + +CHD_EXPORT core_file *chd_core_file(chd_file *chd) +{ + return chd->file; +} + +/*------------------------------------------------- + chd_error_string - return an error string for + the given CHD error +-------------------------------------------------*/ + +CHD_EXPORT const char *chd_error_string(chd_error err) +{ + switch (err) + { + case CHDERR_NONE: return "no error"; + case CHDERR_NO_INTERFACE: return "no drive interface"; + case CHDERR_OUT_OF_MEMORY: return "out of memory"; + case CHDERR_INVALID_FILE: return "invalid file"; + case CHDERR_INVALID_PARAMETER: return "invalid parameter"; + case CHDERR_INVALID_DATA: return "invalid data"; + case CHDERR_FILE_NOT_FOUND: return "file not found"; + case CHDERR_REQUIRES_PARENT: return "requires parent"; + case CHDERR_FILE_NOT_WRITEABLE: return "file not writeable"; + case CHDERR_READ_ERROR: return "read error"; + case CHDERR_WRITE_ERROR: return "write error"; + case CHDERR_CODEC_ERROR: return "codec error"; + case CHDERR_INVALID_PARENT: return "invalid parent"; + case CHDERR_HUNK_OUT_OF_RANGE: return "hunk out of range"; + case CHDERR_DECOMPRESSION_ERROR: return "decompression error"; + case CHDERR_COMPRESSION_ERROR: return "compression error"; + case CHDERR_CANT_CREATE_FILE: return "can't create file"; + case CHDERR_CANT_VERIFY: return "can't verify file"; + case CHDERR_NOT_SUPPORTED: return "operation not supported"; + case CHDERR_METADATA_NOT_FOUND: return "can't find metadata"; + case CHDERR_INVALID_METADATA_SIZE: return "invalid metadata size"; + case CHDERR_UNSUPPORTED_VERSION: return "unsupported CHD version"; + case CHDERR_VERIFY_INCOMPLETE: return "incomplete verify"; + case CHDERR_INVALID_METADATA: return "invalid metadata"; + case CHDERR_INVALID_STATE: return "invalid state"; + case CHDERR_OPERATION_PENDING: return "operation pending"; + case CHDERR_NO_ASYNC_OPERATION: return "no async operation in progress"; + case CHDERR_UNSUPPORTED_FORMAT: return "unsupported format"; + default: return "undocumented error"; + } +} + +/*************************************************************************** + CHD HEADER MANAGEMENT +***************************************************************************/ + +/*------------------------------------------------- + chd_get_header - return a pointer to the + extracted header data +-------------------------------------------------*/ + +CHD_EXPORT const chd_header *chd_get_header(chd_file *chd) +{ + /* punt if NULL or invalid */ + if (chd == NULL || chd->cookie != COOKIE_VALUE) + return NULL; + + return &chd->header; +} + +/*************************************************************************** + CORE DATA READ/WRITE +***************************************************************************/ + +/*------------------------------------------------- + chd_read - read a single hunk from the CHD + file +-------------------------------------------------*/ + +CHD_EXPORT chd_error chd_read(chd_file *chd, UINT32 hunknum, void *buffer) +{ + /* punt if NULL or invalid */ + if (chd == NULL || chd->cookie != COOKIE_VALUE) + return CHDERR_INVALID_PARAMETER; + + /* if we're past the end, fail */ + if (hunknum >= chd->header.totalhunks) + return CHDERR_HUNK_OUT_OF_RANGE; + + /* perform the read */ + return hunk_read_into_memory(chd, hunknum, (UINT8 *)buffer); +} + +/*************************************************************************** + METADATA MANAGEMENT +***************************************************************************/ + +/*------------------------------------------------- + chd_get_metadata - get the indexed metadata + of the given type +-------------------------------------------------*/ + +CHD_EXPORT chd_error chd_get_metadata(chd_file *chd, UINT32 searchtag, UINT32 searchindex, void *output, UINT32 outputlen, UINT32 *resultlen, UINT32 *resulttag, UINT8 *resultflags) +{ + metadata_entry metaentry; + chd_error err; + UINT32 count; + + /* if we didn't find it, just return */ + err = metadata_find_entry(chd, searchtag, searchindex, &metaentry); + if (err != CHDERR_NONE) + { + /* unless we're an old version and they are requesting hard disk metadata */ + if (chd->header.version < 3 && (searchtag == HARD_DISK_METADATA_TAG || searchtag == CHDMETATAG_WILDCARD) && searchindex == 0) + { + char faux_metadata[256]; + UINT32 faux_length; + + /* fill in the faux metadata */ + sprintf(faux_metadata, HARD_DISK_METADATA_FORMAT, chd->header.obsolete_cylinders, chd->header.obsolete_heads, chd->header.obsolete_sectors, chd->header.hunkbytes / chd->header.obsolete_hunksize); + faux_length = (UINT32)strlen(faux_metadata) + 1; + + /* copy the metadata itself */ + memcpy(output, faux_metadata, MIN(outputlen, faux_length)); + + /* return the length of the data and the tag */ + if (resultlen != NULL) + *resultlen = faux_length; + if (resulttag != NULL) + *resulttag = HARD_DISK_METADATA_TAG; + return CHDERR_NONE; + } + return err; + } + + /* read the metadata */ + outputlen = MIN(outputlen, metaentry.length); + core_fseek(chd->file, metaentry.offset + METADATA_HEADER_SIZE, SEEK_SET); + count = core_fread(chd->file, output, outputlen); + if (count != outputlen) + return CHDERR_READ_ERROR; + + /* return the length of the data and the tag */ + if (resultlen != NULL) + *resultlen = metaentry.length; + if (resulttag != NULL) + *resulttag = metaentry.metatag; + if (resultflags != NULL) + *resultflags = metaentry.flags; + return CHDERR_NONE; +} + +/*************************************************************************** + CODEC INTERFACES +***************************************************************************/ + +/*------------------------------------------------- + chd_codec_config - set internal codec + parameters +-------------------------------------------------*/ + +CHD_EXPORT chd_error chd_codec_config(chd_file *chd, int param, void *config) +{ + return CHDERR_INVALID_PARAMETER; +} + +/*------------------------------------------------- + chd_get_codec_name - get the name of a + particular codec +-------------------------------------------------*/ + +CHD_EXPORT const char *chd_get_codec_name(UINT32 codec) +{ + return "Unknown"; +} + +/*************************************************************************** + INTERNAL HEADER OPERATIONS +***************************************************************************/ + +/*------------------------------------------------- + header_validate - check the validity of a + CHD header +-------------------------------------------------*/ + +static chd_error header_validate(const chd_header *header) +{ + int intfnum; + + /* require a valid version */ + if (header->version == 0 || header->version > CHD_HEADER_VERSION) + return CHDERR_UNSUPPORTED_VERSION; + + /* require a valid length */ + if ((header->version == 1 && header->length != CHD_V1_HEADER_SIZE) || + (header->version == 2 && header->length != CHD_V2_HEADER_SIZE) || + (header->version == 3 && header->length != CHD_V3_HEADER_SIZE) || + (header->version == 4 && header->length != CHD_V4_HEADER_SIZE) || + (header->version == 5 && header->length != CHD_V5_HEADER_SIZE)) + return CHDERR_INVALID_PARAMETER; + + /* Do not validate v5 header */ + if (header->version <= 4) + { + /* require valid flags */ + if (header->flags & CHDFLAGS_UNDEFINED) + return CHDERR_INVALID_PARAMETER; + + /* require a supported compression mechanism */ + for (intfnum = 0; intfnum < ARRAY_LENGTH(codec_interfaces); intfnum++) + if (codec_interfaces[intfnum].compression == header->compression[0]) + break; + + if (intfnum == ARRAY_LENGTH(codec_interfaces)) + return CHDERR_INVALID_PARAMETER; + + /* require a valid hunksize */ + if (header->hunkbytes == 0 || header->hunkbytes >= 65536 * 256) + return CHDERR_INVALID_PARAMETER; + + /* require a valid hunk count */ + if (header->totalhunks == 0) + return CHDERR_INVALID_PARAMETER; + + /* require a valid MD5 and/or SHA1 if we're using a parent */ + if ((header->flags & CHDFLAGS_HAS_PARENT) && memcmp(header->parentmd5, nullmd5, sizeof(nullmd5)) == 0 && memcmp(header->parentsha1, nullsha1, sizeof(nullsha1)) == 0) + return CHDERR_INVALID_PARAMETER; + + /* if we're V3 or later, the obsolete fields must be 0 */ + if (header->version >= 3 && + (header->obsolete_cylinders != 0 || header->obsolete_sectors != 0 || + header->obsolete_heads != 0 || header->obsolete_hunksize != 0)) + return CHDERR_INVALID_PARAMETER; + + /* if we're pre-V3, the obsolete fields must NOT be 0 */ + if (header->version < 3 && + (header->obsolete_cylinders == 0 || header->obsolete_sectors == 0 || + header->obsolete_heads == 0 || header->obsolete_hunksize == 0)) + return CHDERR_INVALID_PARAMETER; + } + + return CHDERR_NONE; +} + +/*------------------------------------------------- + header_guess_unitbytes - for older CHD formats, + guess at the bytes/unit based on metadata +-------------------------------------------------*/ + +static UINT32 header_guess_unitbytes(chd_file *chd) +{ + /* look for hard disk metadata; if found, then the unit size == sector size */ + char metadata[512]; + int i0, i1, i2, i3; + if (chd_get_metadata(chd, HARD_DISK_METADATA_TAG, 0, metadata, sizeof(metadata), NULL, NULL, NULL) == CHDERR_NONE && + sscanf(metadata, HARD_DISK_METADATA_FORMAT, &i0, &i1, &i2, &i3) == 4) + return i3; + + /* look for CD-ROM metadata; if found, then the unit size == CD frame size */ + if (chd_get_metadata(chd, CDROM_OLD_METADATA_TAG, 0, metadata, sizeof(metadata), NULL, NULL, NULL) == CHDERR_NONE || + chd_get_metadata(chd, CDROM_TRACK_METADATA_TAG, 0, metadata, sizeof(metadata), NULL, NULL, NULL) == CHDERR_NONE || + chd_get_metadata(chd, CDROM_TRACK_METADATA2_TAG, 0, metadata, sizeof(metadata), NULL, NULL, NULL) == CHDERR_NONE || + chd_get_metadata(chd, GDROM_OLD_METADATA_TAG, 0, metadata, sizeof(metadata), NULL, NULL, NULL) == CHDERR_NONE || + chd_get_metadata(chd, GDROM_TRACK_METADATA_TAG, 0, metadata, sizeof(metadata), NULL, NULL, NULL) == CHDERR_NONE) + return CD_FRAME_SIZE; + + /* otherwise, just map 1:1 with the hunk size */ + return chd->header.hunkbytes; +} + +/*------------------------------------------------- + header_read - read a CHD header into the + internal data structure +-------------------------------------------------*/ + +static chd_error header_read(chd_file *chd, chd_header *header) +{ + UINT8 rawheader[CHD_MAX_HEADER_SIZE]; + UINT32 count; + + /* punt if NULL */ + if (header == NULL) + return CHDERR_INVALID_PARAMETER; + + /* punt if invalid file */ + if (chd->file == NULL) + return CHDERR_INVALID_FILE; + + /* seek and read */ + core_fseek(chd->file, 0, SEEK_SET); + count = core_fread(chd->file, rawheader, sizeof(rawheader)); + if (count != sizeof(rawheader)) + return CHDERR_READ_ERROR; + + /* verify the tag */ + if (strncmp((char *)rawheader, "MComprHD", 8) != 0) + return CHDERR_INVALID_DATA; + + /* extract the direct data */ + memset(header, 0, sizeof(*header)); + header->length = get_bigendian_uint32(&rawheader[8]); + header->version = get_bigendian_uint32(&rawheader[12]); + + /* make sure it's a version we understand */ + if (header->version == 0 || header->version > CHD_HEADER_VERSION) + return CHDERR_UNSUPPORTED_VERSION; + + /* make sure the length is expected */ + if ((header->version == 1 && header->length != CHD_V1_HEADER_SIZE) || + (header->version == 2 && header->length != CHD_V2_HEADER_SIZE) || + (header->version == 3 && header->length != CHD_V3_HEADER_SIZE) || + (header->version == 4 && header->length != CHD_V4_HEADER_SIZE) || + (header->version == 5 && header->length != CHD_V5_HEADER_SIZE)) + + return CHDERR_INVALID_DATA; + + /* extract the common data */ + header->flags = get_bigendian_uint32(&rawheader[16]); + header->compression[0] = get_bigendian_uint32(&rawheader[20]); + header->compression[1] = CHD_CODEC_NONE; + header->compression[2] = CHD_CODEC_NONE; + header->compression[3] = CHD_CODEC_NONE; + + /* extract the V1/V2-specific data */ + if (header->version < 3) + { + int seclen = (header->version == 1) ? CHD_V1_SECTOR_SIZE : get_bigendian_uint32(&rawheader[76]); + header->obsolete_hunksize = get_bigendian_uint32(&rawheader[24]); + header->totalhunks = get_bigendian_uint32(&rawheader[28]); + header->obsolete_cylinders = get_bigendian_uint32(&rawheader[32]); + header->obsolete_heads = get_bigendian_uint32(&rawheader[36]); + header->obsolete_sectors = get_bigendian_uint32(&rawheader[40]); + memcpy(header->md5, &rawheader[44], CHD_MD5_BYTES); + memcpy(header->parentmd5, &rawheader[60], CHD_MD5_BYTES); + header->logicalbytes = (UINT64)header->obsolete_cylinders * (UINT64)header->obsolete_heads * (UINT64)header->obsolete_sectors * (UINT64)seclen; + header->hunkbytes = seclen * header->obsolete_hunksize; + header->unitbytes = header_guess_unitbytes(chd); + header->unitcount = (header->logicalbytes + header->unitbytes - 1) / header->unitbytes; + header->metaoffset = 0; + } + + /* extract the V3-specific data */ + else if (header->version == 3) + { + header->totalhunks = get_bigendian_uint32(&rawheader[24]); + header->logicalbytes = get_bigendian_uint64(&rawheader[28]); + header->metaoffset = get_bigendian_uint64(&rawheader[36]); + memcpy(header->md5, &rawheader[44], CHD_MD5_BYTES); + memcpy(header->parentmd5, &rawheader[60], CHD_MD5_BYTES); + header->hunkbytes = get_bigendian_uint32(&rawheader[76]); + header->unitbytes = header_guess_unitbytes(chd); + header->unitcount = (header->logicalbytes + header->unitbytes - 1) / header->unitbytes; + memcpy(header->sha1, &rawheader[80], CHD_SHA1_BYTES); + memcpy(header->parentsha1, &rawheader[100], CHD_SHA1_BYTES); + } + + /* extract the V4-specific data */ + else if (header->version == 4) + { + header->totalhunks = get_bigendian_uint32(&rawheader[24]); + header->logicalbytes = get_bigendian_uint64(&rawheader[28]); + header->metaoffset = get_bigendian_uint64(&rawheader[36]); + header->hunkbytes = get_bigendian_uint32(&rawheader[44]); + header->unitbytes = header_guess_unitbytes(chd); + header->unitcount = (header->logicalbytes + header->unitbytes - 1) / header->unitbytes; + memcpy(header->sha1, &rawheader[48], CHD_SHA1_BYTES); + memcpy(header->parentsha1, &rawheader[68], CHD_SHA1_BYTES); + memcpy(header->rawsha1, &rawheader[88], CHD_SHA1_BYTES); + } + + /* extract the V5-specific data */ + else if (header->version == 5) + { + /* TODO */ + header->compression[0] = get_bigendian_uint32(&rawheader[16]); + header->compression[1] = get_bigendian_uint32(&rawheader[20]); + header->compression[2] = get_bigendian_uint32(&rawheader[24]); + header->compression[3] = get_bigendian_uint32(&rawheader[28]); + header->logicalbytes = get_bigendian_uint64(&rawheader[32]); + header->mapoffset = get_bigendian_uint64(&rawheader[40]); + header->metaoffset = get_bigendian_uint64(&rawheader[48]); + header->hunkbytes = get_bigendian_uint32(&rawheader[56]); + header->hunkcount = (header->logicalbytes + header->hunkbytes - 1) / header->hunkbytes; + header->unitbytes = get_bigendian_uint32(&rawheader[60]); + header->unitcount = (header->logicalbytes + header->unitbytes - 1) / header->unitbytes; + memcpy(header->sha1, &rawheader[84], CHD_SHA1_BYTES); + memcpy(header->parentsha1, &rawheader[104], CHD_SHA1_BYTES); + memcpy(header->rawsha1, &rawheader[64], CHD_SHA1_BYTES); + + /* determine properties of map entries */ + header->mapentrybytes = chd_compressed(header) ? 12 : 4; + + /* hack */ + header->totalhunks = header->hunkcount; + } + + /* Unknown version */ + else + { + /* TODO */ + } + + /* guess it worked */ + return CHDERR_NONE; +} + +/*************************************************************************** + INTERNAL HUNK READ/WRITE +***************************************************************************/ + +/*------------------------------------------------- + hunk_read_compressed - read a compressed + hunk +-------------------------------------------------*/ + +static UINT8* hunk_read_compressed(chd_file *chd, UINT64 offset, size_t size) +{ +#ifdef _MSC_VER + size_t bytes; +#else + ssize_t bytes; +#endif + if (chd->file_cache != NULL) + { + return chd->file_cache + offset; + } + else + { + core_fseek(chd->file, offset, SEEK_SET); + bytes = core_fread(chd->file, chd->compressed, size); + if (bytes != size) + return NULL; + return chd->compressed; + } +} + +/*------------------------------------------------- + hunk_read_uncompressed - read an uncompressed + hunk +-------------------------------------------------*/ + +static chd_error hunk_read_uncompressed(chd_file *chd, UINT64 offset, size_t size, UINT8 *dest) +{ +#ifdef _MSC_VER + size_t bytes; +#else + ssize_t bytes; +#endif + if (chd->file_cache != NULL) + { + memcpy(dest, chd->file_cache + offset, size); + } + else + { + core_fseek(chd->file, offset, SEEK_SET); + bytes = core_fread(chd->file, dest, size); + if (bytes != size) + return CHDERR_READ_ERROR; + } + return CHDERR_NONE; +} + +#ifdef NEED_CACHE_HUNK +/*------------------------------------------------- + hunk_read_into_cache - read a hunk into + the CHD's hunk cache +-------------------------------------------------*/ + +static chd_error hunk_read_into_cache(chd_file *chd, UINT32 hunknum) +{ + chd_error err; + + /* track the max */ + if (hunknum > chd->maxhunk) + chd->maxhunk = hunknum; + + /* if we're already in the cache, we're done */ + if (chd->cachehunk == hunknum) + return CHDERR_NONE; + chd->cachehunk = ~0; + + /* otherwise, read the data */ + err = hunk_read_into_memory(chd, hunknum, chd->cache); + if (err != CHDERR_NONE) + return err; + + /* mark the hunk successfully cached in */ + chd->cachehunk = hunknum; + return CHDERR_NONE; +} +#endif + +/*------------------------------------------------- + hunk_read_into_memory - read a hunk into + memory at the given location +-------------------------------------------------*/ + +static chd_error hunk_read_into_memory(chd_file *chd, UINT32 hunknum, UINT8 *dest) +{ + chd_error err; + + /* punt if no file */ + if (chd->file == NULL) + return CHDERR_INVALID_FILE; + + /* return an error if out of range */ + if (hunknum >= chd->header.totalhunks) + return CHDERR_HUNK_OUT_OF_RANGE; + + if (dest == NULL) + return CHDERR_INVALID_PARAMETER; + + if (chd->header.version < 5) + { + map_entry *entry = &chd->map[hunknum]; + UINT32 bytes; + UINT8* compressed_bytes; + + /* switch off the entry type */ + switch (entry->flags & MAP_ENTRY_FLAG_TYPE_MASK) + { + /* compressed data */ + case V34_MAP_ENTRY_TYPE_COMPRESSED: + { + void *codec = NULL; + + /* read it into the decompression buffer */ + compressed_bytes = hunk_read_compressed(chd, entry->offset, entry->length); + if (compressed_bytes == NULL) + return CHDERR_READ_ERROR; + + /* now decompress using the codec */ + err = CHDERR_NONE; + codec = &chd->zlib_codec_data; + if (chd->codecintf[0]->decompress != NULL) + err = (*chd->codecintf[0]->decompress)(codec, compressed_bytes, entry->length, dest, chd->header.hunkbytes); + if (err != CHDERR_NONE) + return err; + break; + } + + /* uncompressed data */ + case V34_MAP_ENTRY_TYPE_UNCOMPRESSED: + err = hunk_read_uncompressed(chd, entry->offset, chd->header.hunkbytes, dest); + if (err != CHDERR_NONE) + return err; + break; + + /* mini-compressed data */ + case V34_MAP_ENTRY_TYPE_MINI: + put_bigendian_uint64(&dest[0], entry->offset); + for (bytes = 8; bytes < chd->header.hunkbytes; bytes++) + dest[bytes] = dest[bytes - 8]; + break; + + /* self-referenced data */ + case V34_MAP_ENTRY_TYPE_SELF_HUNK: +#ifdef NEED_CACHE_HUNK + if (chd->cachehunk == entry->offset && dest == chd->cache) + break; +#endif + return hunk_read_into_memory(chd, entry->offset, dest); + + /* parent-referenced data */ + case V34_MAP_ENTRY_TYPE_PARENT_HUNK: + err = hunk_read_into_memory(chd->parent, entry->offset, dest); + if (err != CHDERR_NONE) + return err; + break; + } + return CHDERR_NONE; + } + else + { + void* codec = NULL; + /* get a pointer to the map entry */ + uint64_t blockoffs; + uint32_t blocklen; +#ifdef VERIFY_BLOCK_CRC + uint16_t blockcrc; +#endif + uint8_t *rawmap = &chd->header.rawmap[chd->header.mapentrybytes * hunknum]; + UINT8* compressed_bytes; + + /* uncompressed case */ + if (!chd_compressed(&chd->header)) + { + blockoffs = (uint64_t)get_bigendian_uint32(rawmap) * (uint64_t)chd->header.hunkbytes; + if (blockoffs != 0) { + core_fseek(chd->file, blockoffs, SEEK_SET); + int result = core_fread(chd->file, dest, chd->header.hunkbytes); + /* TODO + else if (m_parent_missing) + throw CHDERR_REQUIRES_PARENT; */ + } else if (chd->parent) { + err = hunk_read_into_memory(chd->parent, hunknum, dest); + if (err != CHDERR_NONE) + return err; + } else { + memset(dest, 0, chd->header.hunkbytes); + } + + return CHDERR_NONE; + } + + /* compressed case */ + blocklen = get_bigendian_uint24(&rawmap[1]); + blockoffs = get_bigendian_uint48(&rawmap[4]); +#ifdef VERIFY_BLOCK_CRC + blockcrc = get_bigendian_uint16(&rawmap[10]); +#endif + codec = NULL; + switch (rawmap[0]) + { + case COMPRESSION_TYPE_0: + case COMPRESSION_TYPE_1: + case COMPRESSION_TYPE_2: + case COMPRESSION_TYPE_3: + compressed_bytes = hunk_read_compressed(chd, blockoffs, blocklen); + if (compressed_bytes == NULL) + return CHDERR_READ_ERROR; + switch (chd->codecintf[rawmap[0]]->compression) + { + case CHD_CODEC_CD_LZMA: + codec = &chd->cdlz_codec_data; + break; + + case CHD_CODEC_ZLIB: + codec = &chd->zlib_codec_data; + break; + + case CHD_CODEC_CD_ZLIB: + codec = &chd->cdzl_codec_data; + break; + + case CHD_CODEC_CD_FLAC: + codec = &chd->cdfl_codec_data; + break; + } + if (codec==NULL) + return CHDERR_CODEC_ERROR; + err = chd->codecintf[rawmap[0]]->decompress(codec, compressed_bytes, blocklen, dest, chd->header.hunkbytes); + if (err != CHDERR_NONE) + return err; +#ifdef VERIFY_BLOCK_CRC + if (crc16(dest, chd->header.hunkbytes) != blockcrc) + return CHDERR_DECOMPRESSION_ERROR; +#endif + return CHDERR_NONE; + + case COMPRESSION_NONE: + err = hunk_read_uncompressed(chd, blockoffs, blocklen, dest); + if (err != CHDERR_NONE) + return err; +#ifdef VERIFY_BLOCK_CRC + if (crc16(dest, chd->header.hunkbytes) != blockcrc) + return CHDERR_DECOMPRESSION_ERROR; +#endif + return CHDERR_NONE; + + case COMPRESSION_SELF: + return hunk_read_into_memory(chd, blockoffs, dest); + + case COMPRESSION_PARENT: +#if 0 + /* TODO */ + if (m_parent_missing) + return CHDERR_REQUIRES_PARENT; + return m_parent->read_bytes(uint64_t(blockoffs) * uint64_t(m_parent->unit_bytes()), dest, m_hunkbytes); +#endif + return CHDERR_DECOMPRESSION_ERROR; + } + return CHDERR_NONE; + } + + /* We should not reach this code */ + return CHDERR_DECOMPRESSION_ERROR; +} + +/*************************************************************************** + INTERNAL MAP ACCESS +***************************************************************************/ + +/*------------------------------------------------- + map_read - read the initial sector map +-------------------------------------------------*/ + +static chd_error map_read(chd_file *chd) +{ + UINT32 entrysize = (chd->header.version < 3) ? OLD_MAP_ENTRY_SIZE : MAP_ENTRY_SIZE; + UINT8 raw_map_entries[MAP_STACK_ENTRIES * MAP_ENTRY_SIZE]; + UINT64 fileoffset, maxoffset = 0; + UINT8 cookie[MAP_ENTRY_SIZE]; + UINT32 count; + chd_error err; + int i; + + /* first allocate memory */ + chd->map = (map_entry *)malloc(sizeof(chd->map[0]) * chd->header.totalhunks); + if (!chd->map) + return CHDERR_OUT_OF_MEMORY; + + /* read the map entries in in chunks and extract to the map list */ + fileoffset = chd->header.length; + for (i = 0; i < chd->header.totalhunks; i += MAP_STACK_ENTRIES) + { + /* compute how many entries this time */ + int entries = chd->header.totalhunks - i, j; + if (entries > MAP_STACK_ENTRIES) + entries = MAP_STACK_ENTRIES; + + /* read that many */ + core_fseek(chd->file, fileoffset, SEEK_SET); + count = core_fread(chd->file, raw_map_entries, entries * entrysize); + if (count != entries * entrysize) + { + err = CHDERR_READ_ERROR; + goto cleanup; + } + fileoffset += entries * entrysize; + + /* process that many */ + if (entrysize == MAP_ENTRY_SIZE) + { + for (j = 0; j < entries; j++) + map_extract(&raw_map_entries[j * MAP_ENTRY_SIZE], &chd->map[i + j]); + } + else + { + for (j = 0; j < entries; j++) + map_extract_old(&raw_map_entries[j * OLD_MAP_ENTRY_SIZE], &chd->map[i + j], chd->header.hunkbytes); + } + + /* track the maximum offset */ + for (j = 0; j < entries; j++) + if ((chd->map[i + j].flags & MAP_ENTRY_FLAG_TYPE_MASK) == V34_MAP_ENTRY_TYPE_COMPRESSED || + (chd->map[i + j].flags & MAP_ENTRY_FLAG_TYPE_MASK) == V34_MAP_ENTRY_TYPE_UNCOMPRESSED) + maxoffset = MAX(maxoffset, chd->map[i + j].offset + chd->map[i + j].length); + } + + /* verify the cookie */ + core_fseek(chd->file, fileoffset, SEEK_SET); + count = core_fread(chd->file, &cookie, entrysize); + if (count != entrysize || memcmp(&cookie, END_OF_LIST_COOKIE, entrysize)) + { + err = CHDERR_INVALID_FILE; + goto cleanup; + } + + /* verify the length */ + if (maxoffset > core_fsize(chd->file)) + { + err = CHDERR_INVALID_FILE; + goto cleanup; + } + return CHDERR_NONE; + +cleanup: + if (chd->map) + free(chd->map); + chd->map = NULL; + return err; +} + +/*************************************************************************** + INTERNAL METADATA ACCESS +***************************************************************************/ + +/*------------------------------------------------- + metadata_find_entry - find a metadata entry +-------------------------------------------------*/ + +static chd_error metadata_find_entry(chd_file *chd, UINT32 metatag, UINT32 metaindex, metadata_entry *metaentry) +{ + /* start at the beginning */ + metaentry->offset = chd->header.metaoffset; + metaentry->prev = 0; + + /* loop until we run out of options */ + while (metaentry->offset != 0) + { + UINT8 raw_meta_header[METADATA_HEADER_SIZE]; + UINT32 count; + + /* read the raw header */ + core_fseek(chd->file, metaentry->offset, SEEK_SET); + count = core_fread(chd->file, raw_meta_header, sizeof(raw_meta_header)); + if (count != sizeof(raw_meta_header)) + break; + + /* extract the data */ + metaentry->metatag = get_bigendian_uint32(&raw_meta_header[0]); + metaentry->length = get_bigendian_uint32(&raw_meta_header[4]); + metaentry->next = get_bigendian_uint64(&raw_meta_header[8]); + + /* flags are encoded in the high byte of length */ + metaentry->flags = metaentry->length >> 24; + metaentry->length &= 0x00ffffff; + + /* if we got a match, proceed */ + if (metatag == CHDMETATAG_WILDCARD || metaentry->metatag == metatag) + if (metaindex-- == 0) + return CHDERR_NONE; + + /* no match, fetch the next link */ + metaentry->prev = metaentry->offset; + metaentry->offset = metaentry->next; + } + + /* if we get here, we didn't find it */ + return CHDERR_METADATA_NOT_FOUND; +} + +/*************************************************************************** + ZLIB COMPRESSION CODEC +***************************************************************************/ + +/*------------------------------------------------- + zlib_codec_init - initialize the ZLIB codec +-------------------------------------------------*/ + +static chd_error zlib_codec_init(void *codec, uint32_t hunkbytes) +{ + int zerr; + chd_error err; + zlib_codec_data *data = (zlib_codec_data*)codec; + + /* clear the buffers */ + memset(data, 0, sizeof(zlib_codec_data)); + + /* init the inflater first */ + data->inflater.next_in = (Bytef *)data; /* bogus, but that's ok */ + data->inflater.avail_in = 0; + data->inflater.zalloc = zlib_fast_alloc; + data->inflater.zfree = zlib_fast_free; + data->inflater.opaque = &data->allocator; + zerr = inflateInit2(&data->inflater, -MAX_WBITS); + + /* convert errors */ + if (zerr == Z_MEM_ERROR) + err = CHDERR_OUT_OF_MEMORY; + else if (zerr != Z_OK) + err = CHDERR_CODEC_ERROR; + else + err = CHDERR_NONE; + + /* handle an error */ + if (err != CHDERR_NONE) + free(data); + + return err; +} + +/*------------------------------------------------- + zlib_codec_free - free data for the ZLIB + codec +-------------------------------------------------*/ + +static void zlib_codec_free(void *codec) +{ + zlib_codec_data *data = (zlib_codec_data *)codec; + + /* deinit the streams */ + if (data != NULL) + { + int i; + + inflateEnd(&data->inflater); + + /* free our fast memory */ + zlib_allocator_free(&data->allocator); + } +} + +/*------------------------------------------------- + zlib_codec_decompress - decompress data using + the ZLIB codec +-------------------------------------------------*/ + +static chd_error zlib_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen) +{ + zlib_codec_data *data = (zlib_codec_data *)codec; + int zerr; + + /* reset the decompressor */ + data->inflater.next_in = (Bytef *)src; + data->inflater.avail_in = complen; + data->inflater.total_in = 0; + data->inflater.next_out = (Bytef *)dest; + data->inflater.avail_out = destlen; + data->inflater.total_out = 0; + zerr = inflateReset(&data->inflater); + if (zerr != Z_OK) + return CHDERR_DECOMPRESSION_ERROR; + + /* do it */ + zerr = inflate(&data->inflater, Z_FINISH); + if (data->inflater.total_out != destlen) + return CHDERR_DECOMPRESSION_ERROR; + + return CHDERR_NONE; +} + +/*------------------------------------------------- + zlib_fast_alloc - fast malloc for ZLIB, which + allocates and frees memory frequently +-------------------------------------------------*/ + +/* Huge alignment values for possible SIMD optimization by compiler (NEON, SSE, AVX) */ +#define ZLIB_MIN_ALIGNMENT_BITS 512 +#define ZLIB_MIN_ALIGNMENT_BYTES (ZLIB_MIN_ALIGNMENT_BITS / 8) + +static voidpf zlib_fast_alloc(voidpf opaque, uInt items, uInt size) +{ + zlib_allocator *alloc = (zlib_allocator *)opaque; + uintptr_t paddr = 0; + UINT32 *ptr; + int i; + + /* compute the size, rounding to the nearest 1k */ + size = (size * items + 0x3ff) & ~0x3ff; + + /* reuse a hunk if we can */ + for (i = 0; i < MAX_ZLIB_ALLOCS; i++) + { + ptr = alloc->allocptr[i]; + if (ptr && size == *ptr) + { + /* set the low bit of the size so we don't match next time */ + *ptr |= 1; + + /* return aligned block address */ + return (voidpf)(alloc->allocptr2[i]); + } + } + + /* alloc a new one */ + ptr = (UINT32 *)malloc(size + sizeof(UINT32) + ZLIB_MIN_ALIGNMENT_BYTES); + if (!ptr) + return NULL; + + /* put it into the list */ + for (i = 0; i < MAX_ZLIB_ALLOCS; i++) + if (!alloc->allocptr[i]) + { + alloc->allocptr[i] = ptr; + paddr = (((uintptr_t)ptr) + sizeof(UINT32) + (ZLIB_MIN_ALIGNMENT_BYTES-1)) & (~(ZLIB_MIN_ALIGNMENT_BYTES-1)); + alloc->allocptr2[i] = (uint32_t*)paddr; + break; + } + + /* set the low bit of the size so we don't match next time */ + *ptr = size | 1; + + /* return aligned block address */ + return (voidpf)paddr; +} + +/*------------------------------------------------- + zlib_fast_free - fast free for ZLIB, which + allocates and frees memory frequently +-------------------------------------------------*/ + +static void zlib_fast_free(voidpf opaque, voidpf address) +{ + zlib_allocator *alloc = (zlib_allocator *)opaque; + UINT32 *ptr = (UINT32 *)address; + int i; + + /* find the hunk */ + for (i = 0; i < MAX_ZLIB_ALLOCS; i++) + if (ptr == alloc->allocptr2[i]) + { + /* clear the low bit of the size to allow matches */ + *(alloc->allocptr[i]) &= ~1; + return; + } +} + +/*------------------------------------------------- + zlib_allocator_free +-------------------------------------------------*/ +static void zlib_allocator_free(voidpf opaque) +{ + zlib_allocator *alloc = (zlib_allocator *)opaque; + int i; + + for (i = 0; i < MAX_ZLIB_ALLOCS; i++) + if (alloc->allocptr[i]) + free(alloc->allocptr[i]); +} diff --git a/cores/saturn/deps/libchdr/src/libchdr_flac.c b/cores/saturn/deps/libchdr/src/libchdr_flac.c new file mode 100644 index 000000000..54d374f79 --- /dev/null +++ b/cores/saturn/deps/libchdr/src/libchdr_flac.c @@ -0,0 +1,302 @@ +/* license:BSD-3-Clause + * copyright-holders:Aaron Giles +*************************************************************************** + + flac.c + + FLAC compression wrappers + +***************************************************************************/ + +#include +#include + +#include +#define DR_FLAC_IMPLEMENTATION +#include + +/*************************************************************************** + * FLAC DECODER + *************************************************************************** + */ + +static size_t flac_decoder_read_callback(void *userdata, void *buffer, size_t bytes); +static drflac_bool32 flac_decoder_seek_callback(void *userdata, int offset, drflac_seek_origin origin); +static void flac_decoder_metadata_callback(void *userdata, drflac_metadata *metadata); +static void flac_decoder_write_callback(void *userdata, void *buffer, size_t len); + + +/* getters (valid after reset) */ +static uint32_t sample_rate(flac_decoder *decoder) { return decoder->sample_rate; } +static uint8_t channels(flac_decoder *decoder) { return decoder->channels; } +static uint8_t bits_per_sample(flac_decoder *decoder) { return decoder->bits_per_sample; } + +/*------------------------------------------------- + * flac_decoder - constructor + *------------------------------------------------- + */ + +int flac_decoder_init(flac_decoder *decoder) +{ + decoder->decoder = NULL; + decoder->sample_rate = 0; + decoder->channels = 0; + decoder->bits_per_sample = 0; + decoder->compressed_offset = 0; + decoder->compressed_start = NULL; + decoder->compressed_length = 0; + decoder->compressed2_start = NULL; + decoder->compressed2_length = 0; + decoder->uncompressed_offset = 0; + decoder->uncompressed_length = 0; + decoder->uncompressed_swap = 0; + return 0; +} + +/*------------------------------------------------- + * flac_decoder - destructor + *------------------------------------------------- + */ + +void flac_decoder_free(flac_decoder* decoder) +{ + if ((decoder != NULL) && (decoder->decoder != NULL)) + drflac_close(decoder->decoder); + decoder->decoder = NULL; +} + +/*------------------------------------------------- + * reset - reset state with the original + * parameters + *------------------------------------------------- + */ + +static int flac_decoder_internal_reset(flac_decoder* decoder) +{ + decoder->compressed_offset = 0; + flac_decoder_free(decoder); + decoder->decoder = drflac_open_with_metadata( + flac_decoder_read_callback, flac_decoder_seek_callback, + flac_decoder_metadata_callback, decoder, NULL); + return (decoder->decoder != NULL); +} + +/*------------------------------------------------- + * reset - reset state with new memory parameters + * and a custom-generated header + *------------------------------------------------- + */ + +int flac_decoder_reset(flac_decoder* decoder, uint32_t sample_rate, uint8_t num_channels, uint32_t block_size, const void *buffer, uint32_t length) +{ + /* modify the template header with our parameters */ + static const uint8_t s_header_template[0x2a] = + { + 0x66, 0x4C, 0x61, 0x43, /* +00: 'fLaC' stream header */ + 0x80, /* +04: metadata block type 0 (STREAMINFO), */ + /* flagged as last block */ + 0x00, 0x00, 0x22, /* +05: metadata block length = 0x22 */ + 0x00, 0x00, /* +08: minimum block size */ + 0x00, 0x00, /* +0A: maximum block size */ + 0x00, 0x00, 0x00, /* +0C: minimum frame size (0 == unknown) */ + 0x00, 0x00, 0x00, /* +0F: maximum frame size (0 == unknown) */ + 0x0A, 0xC4, 0x42, 0xF0, 0x00, 0x00, 0x00, 0x00, /* +12: sample rate (0x0ac44 == 44100), */ + /* numchannels (2), sample bits (16), */ + /* samples in stream (0 == unknown) */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* +1A: MD5 signature (0 == none) */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* +2A: start of stream data */ + }; + memcpy(decoder->custom_header, s_header_template, sizeof(s_header_template)); + decoder->custom_header[0x08] = decoder->custom_header[0x0a] = (block_size*num_channels) >> 8; + decoder->custom_header[0x09] = decoder->custom_header[0x0b] = (block_size*num_channels) & 0xff; + decoder->custom_header[0x12] = sample_rate >> 12; + decoder->custom_header[0x13] = sample_rate >> 4; + decoder->custom_header[0x14] = (sample_rate << 4) | ((num_channels - 1) << 1); + + /* configure the header ahead of the provided buffer */ + decoder->compressed_start = (const uint8_t *)(decoder->custom_header); + decoder->compressed_length = sizeof(decoder->custom_header); + decoder->compressed2_start = (const uint8_t *)(buffer); + decoder->compressed2_length = length; + return flac_decoder_internal_reset(decoder); +} + +/*------------------------------------------------- + * decode_interleaved - decode to an interleaved + * sound stream + *------------------------------------------------- + */ + +int flac_decoder_decode_interleaved(flac_decoder* decoder, int16_t *samples, uint32_t num_samples, int swap_endian) +{ + /* configure the uncompressed buffer */ + memset(decoder->uncompressed_start, 0, sizeof(decoder->uncompressed_start)); + decoder->uncompressed_start[0] = samples; + decoder->uncompressed_offset = 0; + decoder->uncompressed_length = num_samples; + decoder->uncompressed_swap = swap_endian; + +#define BUFFER 2352 /* bytes per CD audio sector */ + int16_t buffer[BUFFER]; + uint32_t buf_samples = BUFFER / channels(decoder); + /* loop until we get everything we want */ + while (decoder->uncompressed_offset < decoder->uncompressed_length) { + uint32_t frames = (num_samples < buf_samples ? num_samples : buf_samples); + if (!drflac_read_pcm_frames_s16(decoder->decoder, frames, buffer)) + return 0; + flac_decoder_write_callback(decoder, buffer, frames*sizeof(*buffer)*channels(decoder)); + num_samples -= frames; + } + return 1; +} + +/*------------------------------------------------- + * finish - finish up the decode + *------------------------------------------------- + */ + +uint32_t flac_decoder_finish(flac_decoder* decoder) +{ + /* get the final decoding position and move forward */ + drflac *flac = decoder->decoder; + uint64_t position = decoder->compressed_offset; + + /* ugh... there's no function to obtain bytes used in drflac :-/ */ + position -= DRFLAC_CACHE_L2_LINES_REMAINING(&flac->bs) * sizeof(drflac_cache_t); + position -= DRFLAC_CACHE_L1_BITS_REMAINING(&flac->bs) / 8; + position -= flac->bs.unalignedByteCount; + + /* adjust position if we provided the header */ + if (position == 0) + return 0; + if (decoder->compressed_start == (const uint8_t *)(decoder->custom_header)) + position -= decoder->compressed_length; + + flac_decoder_free(decoder); + return position; +} + +/*------------------------------------------------- + * read_callback - handle reads from the input + * stream + *------------------------------------------------- + */ + +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +static size_t flac_decoder_read_callback(void *userdata, void *buffer, size_t bytes) +{ + flac_decoder* decoder = (flac_decoder*)userdata; + uint8_t *dst = buffer; + + /* copy from primary buffer first */ + uint32_t outputpos = 0; + if (outputpos < bytes && decoder->compressed_offset < decoder->compressed_length) + { + uint32_t bytes_to_copy = MIN(bytes - outputpos, decoder->compressed_length - decoder->compressed_offset); + memcpy(&dst[outputpos], decoder->compressed_start + decoder->compressed_offset, bytes_to_copy); + outputpos += bytes_to_copy; + decoder->compressed_offset += bytes_to_copy; + } + + /* once we're out of that, copy from the secondary buffer */ + if (outputpos < bytes && decoder->compressed_offset < decoder->compressed_length + decoder->compressed2_length) + { + uint32_t bytes_to_copy = MIN(bytes - outputpos, decoder->compressed2_length - (decoder->compressed_offset - decoder->compressed_length)); + memcpy(&dst[outputpos], decoder->compressed2_start + decoder->compressed_offset - decoder->compressed_length, bytes_to_copy); + outputpos += bytes_to_copy; + decoder->compressed_offset += bytes_to_copy; + } + + return outputpos; +} + +/*------------------------------------------------- + * metadata_callback - handle STREAMINFO metadata + *------------------------------------------------- + */ + +static void flac_decoder_metadata_callback(void *userdata, drflac_metadata *metadata) +{ + flac_decoder *decoder = userdata; + + /* ignore all but STREAMINFO metadata */ + if (metadata->type != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO) + return; + + /* parse out the data we care about */ + decoder->sample_rate = metadata->data.streaminfo.sampleRate; + decoder->bits_per_sample = metadata->data.streaminfo.bitsPerSample; + decoder->channels = metadata->data.streaminfo.channels; +} + +/*------------------------------------------------- + * write_callback - handle writes to the output + * stream + *------------------------------------------------- + */ + +static void flac_decoder_write_callback(void *userdata, void *buffer, size_t bytes) +{ + int sampnum, chan; + int shift, blocksize; + flac_decoder * decoder = (flac_decoder *)userdata; + int16_t *sampbuf = (int16_t *)buffer; + int sampch = channels(decoder); + uint32_t offset = decoder->uncompressed_offset; + uint16_t usample; + + /* interleaved case */ + shift = decoder->uncompressed_swap ? 8 : 0; + blocksize = bytes / (sampch * sizeof(sampbuf[0])); + if (decoder->uncompressed_start[1] == NULL) + { + int16_t *dest = decoder->uncompressed_start[0] + offset * sampch; + for (sampnum = 0; sampnum < blocksize && offset < decoder->uncompressed_length; sampnum++, offset++) + for (chan = 0; chan < sampch; chan++) { + usample = (uint16_t)*sampbuf++; + *dest++ = (int16_t)((usample << shift) | (usample >> shift)); + } + } + + /* non-interleaved case */ + else + { + for (sampnum = 0; sampnum < blocksize && offset < decoder->uncompressed_length; sampnum++, offset++) + for (chan = 0; chan < sampch; chan++) { + usample = (uint16_t)*sampbuf++; + if (decoder->uncompressed_start[chan] != NULL) + decoder->uncompressed_start[chan][offset] = (int16_t) ((usample << shift) | (usample >> shift)); + } + } + decoder->uncompressed_offset = offset; +} + + +/*------------------------------------------------- + * seek_callback - handle seeks on the output + * stream + *------------------------------------------------- + */ + +static drflac_bool32 flac_decoder_seek_callback(void *userdata, int offset, drflac_seek_origin origin) +{ + flac_decoder * decoder = (flac_decoder *)userdata; + uint32_t length = decoder->compressed_length + decoder->compressed2_length; + + if (origin == drflac_seek_origin_start) { + uint32_t pos = offset; + if (pos <= length) { + decoder->compressed_offset = pos; + return 1; + } + } else if (origin == drflac_seek_origin_current) { + uint32_t pos = decoder->compressed_offset + offset; + if (pos <= length) { + decoder->compressed_offset = pos; + return 1; + } + } + return 0; +} + diff --git a/cores/saturn/deps/libchdr/src/libchdr_huffman.c b/cores/saturn/deps/libchdr/src/libchdr_huffman.c new file mode 100644 index 000000000..6a50f1344 --- /dev/null +++ b/cores/saturn/deps/libchdr/src/libchdr_huffman.c @@ -0,0 +1,544 @@ +/* license:BSD-3-Clause + * copyright-holders:Aaron Giles +**************************************************************************** + + huffman.c + + Static Huffman compression and decompression helpers. + +**************************************************************************** + + Maximum codelength is officially (alphabetsize - 1). This would be 255 bits + (since we use 1 byte values). However, it is also dependent upon the number + of samples used, as follows: + + 2 bits -> 3..4 samples + 3 bits -> 5..7 samples + 4 bits -> 8..12 samples + 5 bits -> 13..20 samples + 6 bits -> 21..33 samples + 7 bits -> 34..54 samples + 8 bits -> 55..88 samples + 9 bits -> 89..143 samples + 10 bits -> 144..232 samples + 11 bits -> 233..376 samples + 12 bits -> 377..609 samples + 13 bits -> 610..986 samples + 14 bits -> 987..1596 samples + 15 bits -> 1597..2583 samples + 16 bits -> 2584..4180 samples -> note that a 4k data size guarantees codelength <= 16 bits + 17 bits -> 4181..6764 samples + 18 bits -> 6765..10945 samples + 19 bits -> 10946..17710 samples + 20 bits -> 17711..28656 samples + 21 bits -> 28657..46367 samples + 22 bits -> 46368..75024 samples + 23 bits -> 75025..121392 samples + 24 bits -> 121393..196417 samples + 25 bits -> 196418..317810 samples + 26 bits -> 317811..514228 samples + 27 bits -> 514229..832039 samples + 28 bits -> 832040..1346268 samples + 29 bits -> 1346269..2178308 samples + 30 bits -> 2178309..3524577 samples + 31 bits -> 3524578..5702886 samples + 32 bits -> 5702887..9227464 samples + + Looking at it differently, here is where powers of 2 fall into these buckets: + + 256 samples -> 11 bits max + 512 samples -> 12 bits max + 1k samples -> 14 bits max + 2k samples -> 15 bits max + 4k samples -> 16 bits max + 8k samples -> 18 bits max + 16k samples -> 19 bits max + 32k samples -> 21 bits max + 64k samples -> 22 bits max + 128k samples -> 24 bits max + 256k samples -> 25 bits max + 512k samples -> 27 bits max + 1M samples -> 28 bits max + 2M samples -> 29 bits max + 4M samples -> 31 bits max + 8M samples -> 32 bits max + +**************************************************************************** + + Delta-RLE encoding works as follows: + + Starting value is assumed to be 0. All data is encoded as a delta + from the previous value, such that final[i] = final[i - 1] + delta. + Long runs of 0s are RLE-encoded as follows: + + 0x100 = repeat count of 8 + 0x101 = repeat count of 9 + 0x102 = repeat count of 10 + 0x103 = repeat count of 11 + 0x104 = repeat count of 12 + 0x105 = repeat count of 13 + 0x106 = repeat count of 14 + 0x107 = repeat count of 15 + 0x108 = repeat count of 16 + 0x109 = repeat count of 32 + 0x10a = repeat count of 64 + 0x10b = repeat count of 128 + 0x10c = repeat count of 256 + 0x10d = repeat count of 512 + 0x10e = repeat count of 1024 + 0x10f = repeat count of 2048 + + Note that repeat counts are reset at the end of a row, so if a 0 run + extends to the end of a row, a large repeat count may be used. + + The reason for starting the run counts at 8 is that 0 is expected to + be the most common symbol, and is typically encoded in 1 or 2 bits. + +***************************************************************************/ + +#include +#include +#include +#include + +#include + +#define MAX(x,y) ((x) > (y) ? (x) : (y)) + +/*************************************************************************** + * MACROS + *************************************************************************** + */ + +#define MAKE_LOOKUP(code,bits) (((code) << 5) | ((bits) & 0x1f)) + +/*************************************************************************** + * IMPLEMENTATION + *************************************************************************** + */ + +/*------------------------------------------------- + * huffman_context_base - create an encoding/ + * decoding context + *------------------------------------------------- + */ + +struct huffman_decoder* create_huffman_decoder(int numcodes, int maxbits) +{ + struct huffman_decoder* decoder = NULL; + + /* limit to 24 bits */ + if (maxbits > 24) + return NULL; + + decoder = (struct huffman_decoder*)malloc(sizeof(struct huffman_decoder)); + decoder->numcodes = numcodes; + decoder->maxbits = maxbits; + decoder->lookup = (lookup_value*)malloc(sizeof(lookup_value) * (1 << maxbits)); + decoder->huffnode = (struct node_t*)malloc(sizeof(struct node_t) * numcodes); + decoder->datahisto = NULL; + decoder->prevdata = 0; + decoder->rleremaining = 0; + return decoder; +} + +void delete_huffman_decoder(struct huffman_decoder* decoder) +{ + if (decoder != NULL) + { + if (decoder->lookup != NULL) + free(decoder->lookup); + if (decoder->huffnode != NULL) + free(decoder->huffnode); + free(decoder); + } +} + +/*------------------------------------------------- + * decode_one - decode a single code from the + * huffman stream + *------------------------------------------------- + */ + +uint32_t huffman_decode_one(struct huffman_decoder* decoder, struct bitstream* bitbuf) +{ + /* peek ahead to get maxbits worth of data */ + uint32_t bits = bitstream_peek(bitbuf, decoder->maxbits); + + /* look it up, then remove the actual number of bits for this code */ + lookup_value lookup = decoder->lookup[bits]; + bitstream_remove(bitbuf, lookup & 0x1f); + + /* return the value */ + return lookup >> 5; +} + +/*------------------------------------------------- + * import_tree_rle - import an RLE-encoded + * huffman tree from a source data stream + *------------------------------------------------- + */ + +enum huffman_error huffman_import_tree_rle(struct huffman_decoder* decoder, struct bitstream* bitbuf) +{ + int numbits, curnode; + enum huffman_error error; + + /* bits per entry depends on the maxbits */ + if (decoder->maxbits >= 16) + numbits = 5; + else if (decoder->maxbits >= 8) + numbits = 4; + else + numbits = 3; + + /* loop until we read all the nodes */ + for (curnode = 0; curnode < decoder->numcodes; ) + { + /* a non-one value is just raw */ + int nodebits = bitstream_read(bitbuf, numbits); + if (nodebits != 1) + decoder->huffnode[curnode++].numbits = nodebits; + + /* a one value is an escape code */ + else + { + /* a double 1 is just a single 1 */ + nodebits = bitstream_read(bitbuf, numbits); + if (nodebits == 1) + decoder->huffnode[curnode++].numbits = nodebits; + + /* otherwise, we need one for value for the repeat count */ + else + { + int repcount = bitstream_read(bitbuf, numbits) + 3; + while (repcount--) + decoder->huffnode[curnode++].numbits = nodebits; + } + } + } + + /* make sure we ended up with the right number */ + if (curnode != decoder->numcodes) + return HUFFERR_INVALID_DATA; + + /* assign canonical codes for all nodes based on their code lengths */ + error = huffman_assign_canonical_codes(decoder); + if (error != HUFFERR_NONE) + return error; + + /* build the lookup table */ + huffman_build_lookup_table(decoder); + + /* determine final input length and report errors */ + return bitstream_overflow(bitbuf) ? HUFFERR_INPUT_BUFFER_TOO_SMALL : HUFFERR_NONE; +} + + +/*------------------------------------------------- + * import_tree_huffman - import a huffman-encoded + * huffman tree from a source data stream + *------------------------------------------------- + */ + +enum huffman_error huffman_import_tree_huffman(struct huffman_decoder* decoder, struct bitstream* bitbuf) +{ + int start; + int last = 0; + int count = 0; + int index; + int curcode; + uint8_t rlefullbits = 0; + uint32_t temp; + enum huffman_error error; + /* start by parsing the lengths for the small tree */ + struct huffman_decoder* smallhuff = create_huffman_decoder(24, 6); + smallhuff->huffnode[0].numbits = bitstream_read(bitbuf, 3); + start = bitstream_read(bitbuf, 3) + 1; + for (index = 1; index < 24; index++) + { + if (index < start || count == 7) + smallhuff->huffnode[index].numbits = 0; + else + { + count = bitstream_read(bitbuf, 3); + smallhuff->huffnode[index].numbits = (count == 7) ? 0 : count; + } + } + + /* then regenerate the tree */ + error = huffman_assign_canonical_codes(smallhuff); + if (error != HUFFERR_NONE) + return error; + huffman_build_lookup_table(smallhuff); + + /* determine the maximum length of an RLE count */ + temp = decoder->numcodes - 9; + while (temp != 0) + temp >>= 1, rlefullbits++; + + /* now process the rest of the data */ + for (curcode = 0; curcode < decoder->numcodes; ) + { + int value = huffman_decode_one(smallhuff, bitbuf); + if (value != 0) + decoder->huffnode[curcode++].numbits = last = value - 1; + else + { + int count = bitstream_read(bitbuf, 3) + 2; + if (count == 7+2) + count += bitstream_read(bitbuf, rlefullbits); + for ( ; count != 0 && curcode < decoder->numcodes; count--) + decoder->huffnode[curcode++].numbits = last; + } + } + + /* make sure we ended up with the right number */ + if (curcode != decoder->numcodes) + return HUFFERR_INVALID_DATA; + + /* assign canonical codes for all nodes based on their code lengths */ + error = huffman_assign_canonical_codes(decoder); + if (error != HUFFERR_NONE) + return error; + + /* build the lookup table */ + huffman_build_lookup_table(decoder); + + /* determine final input length and report errors */ + return bitstream_overflow(bitbuf) ? HUFFERR_INPUT_BUFFER_TOO_SMALL : HUFFERR_NONE; +} + +/*------------------------------------------------- + * compute_tree_from_histo - common backend for + * computing a tree based on the data histogram + *------------------------------------------------- + */ + +enum huffman_error huffman_compute_tree_from_histo(struct huffman_decoder* decoder) +{ + int i; + uint32_t lowerweight; + uint32_t upperweight; + /* compute the number of data items in the histogram */ + uint32_t sdatacount = 0; + for (i = 0; i < decoder->numcodes; i++) + sdatacount += decoder->datahisto[i]; + + /* binary search to achieve the optimum encoding */ + lowerweight = 0; + upperweight = sdatacount * 2; + while (1) + { + /* build a tree using the current weight */ + uint32_t curweight = (upperweight + lowerweight) / 2; + int curmaxbits = huffman_build_tree(decoder, sdatacount, curweight); + + /* apply binary search here */ + if (curmaxbits <= decoder->maxbits) + { + lowerweight = curweight; + + /* early out if it worked with the raw weights, or if we're done searching */ + if (curweight == sdatacount || (upperweight - lowerweight) <= 1) + break; + } + else + upperweight = curweight; + } + + /* assign canonical codes for all nodes based on their code lengths */ + return huffman_assign_canonical_codes(decoder); +} + +/*************************************************************************** + * INTERNAL FUNCTIONS + *************************************************************************** + */ + +/*------------------------------------------------- + * tree_node_compare - compare two tree nodes + * by weight + *------------------------------------------------- + */ + +static int huffman_tree_node_compare(const void *item1, const void *item2) +{ + const struct node_t *node1 = *(const struct node_t **)item1; + const struct node_t *node2 = *(const struct node_t **)item2; + if (node2->weight != node1->weight) + return node2->weight - node1->weight; + if (node2->bits - node1->bits == 0) + fprintf(stderr, "identical node sort keys, should not happen!\n"); + return (int)node1->bits - (int)node2->bits; +} + +/*------------------------------------------------- + * build_tree - build a huffman tree based on the + * data distribution + *------------------------------------------------- + */ + +int huffman_build_tree(struct huffman_decoder* decoder, uint32_t totaldata, uint32_t totalweight) +{ + int curcode; + int nextalloc; + int listitems = 0; + int maxbits = 0; + /* make a list of all non-zero nodes */ + struct node_t** list = (struct node_t**)malloc(sizeof(struct node_t*) * decoder->numcodes * 2); + memset(decoder->huffnode, 0, decoder->numcodes * sizeof(decoder->huffnode[0])); + for (curcode = 0; curcode < decoder->numcodes; curcode++) + if (decoder->datahisto[curcode] != 0) + { + list[listitems++] = &decoder->huffnode[curcode]; + decoder->huffnode[curcode].count = decoder->datahisto[curcode]; + decoder->huffnode[curcode].bits = curcode; + + /* scale the weight by the current effective length, ensuring we don't go to 0 */ + decoder->huffnode[curcode].weight = ((uint64_t)decoder->datahisto[curcode]) * ((uint64_t)totalweight) / ((uint64_t)totaldata); + if (decoder->huffnode[curcode].weight == 0) + decoder->huffnode[curcode].weight = 1; + } + +#if 0 + fprintf(stderr, "Pre-sort:\n"); + for (int i = 0; i < listitems; i++) { + fprintf(stderr, "weight: %d code: %d\n", list[i]->m_weight, list[i]->m_bits); + } +#endif + + /* sort the list by weight, largest weight first */ + qsort(&list[0], listitems, sizeof(list[0]), huffman_tree_node_compare); + +#if 0 + fprintf(stderr, "Post-sort:\n"); + for (int i = 0; i < listitems; i++) { + fprintf(stderr, "weight: %d code: %d\n", list[i]->m_weight, list[i]->m_bits); + } + fprintf(stderr, "===================\n"); +#endif + + /* now build the tree */ + nextalloc = decoder->numcodes; + while (listitems > 1) + { + int curitem; + /* remove lowest two items */ + struct node_t* node1 = &(*list[--listitems]); + struct node_t* node0 = &(*list[--listitems]); + + /* create new node */ + struct node_t* newnode = &decoder->huffnode[nextalloc++]; + newnode->parent = NULL; + node0->parent = node1->parent = newnode; + newnode->weight = node0->weight + node1->weight; + + /* insert into list at appropriate location */ + for (curitem = 0; curitem < listitems; curitem++) + if (newnode->weight > list[curitem]->weight) + { + memmove(&list[curitem+1], &list[curitem], (listitems - curitem) * sizeof(list[0])); + break; + } + list[curitem] = newnode; + listitems++; + } + + /* compute the number of bits in each code, and fill in another histogram */ + for (curcode = 0; curcode < decoder->numcodes; curcode++) + { + struct node_t *curnode; + struct node_t* node = &decoder->huffnode[curcode]; + node->numbits = 0; + node->bits = 0; + + /* if we have a non-zero weight, compute the number of bits */ + if (node->weight > 0) + { + /* determine the number of bits for this node */ + for (curnode = node; curnode->parent != NULL; curnode = curnode->parent) + node->numbits++; + if (node->numbits == 0) + node->numbits = 1; + + /* keep track of the max */ + maxbits = MAX(maxbits, ((int)node->numbits)); + } + } + return maxbits; +} + +/*------------------------------------------------- + * assign_canonical_codes - assign canonical codes + * to all the nodes based on the number of bits + * in each + *------------------------------------------------- + */ + +enum huffman_error huffman_assign_canonical_codes(struct huffman_decoder* decoder) +{ + int curcode, codelen; + uint32_t curstart = 0; + /* build up a histogram of bit lengths */ + uint32_t bithisto[33] = { 0 }; + for (curcode = 0; curcode < decoder->numcodes; curcode++) + { + struct node_t* node = &decoder->huffnode[curcode]; + if (node->numbits > decoder->maxbits) + return HUFFERR_INTERNAL_INCONSISTENCY; + if (node->numbits <= 32) + bithisto[node->numbits]++; + } + + /* for each code length, determine the starting code number */ + for (codelen = 32; codelen > 0; codelen--) + { + uint32_t nextstart = (curstart + bithisto[codelen]) >> 1; + if (codelen != 1 && nextstart * 2 != (curstart + bithisto[codelen])) + return HUFFERR_INTERNAL_INCONSISTENCY; + bithisto[codelen] = curstart; + curstart = nextstart; + } + + /* now assign canonical codes */ + for (curcode = 0; curcode < decoder->numcodes; curcode++) + { + struct node_t* node = &decoder->huffnode[curcode]; + if (node->numbits > 0) + node->bits = bithisto[node->numbits]++; + } + return HUFFERR_NONE; +} + +/*------------------------------------------------- + * build_lookup_table - build a lookup table for + * fast decoding + *------------------------------------------------- + */ + +void huffman_build_lookup_table(struct huffman_decoder* decoder) +{ + int curcode; + /* iterate over all codes */ + for (curcode = 0; curcode < decoder->numcodes; curcode++) + { + /* process all nodes which have non-zero bits */ + struct node_t* node = &decoder->huffnode[curcode]; + if (node->numbits > 0) + { + int shift; + lookup_value *dest; + lookup_value *destend; + /* set up the entry */ + lookup_value value = MAKE_LOOKUP(curcode, node->numbits); + + /* fill all matching entries */ + shift = decoder->maxbits - node->numbits; + dest = &decoder->lookup[node->bits << shift]; + destend = &decoder->lookup[((node->bits + 1) << shift) - 1]; + while (dest <= destend) + *dest++ = value; + } + } +} diff --git a/cores/saturn/deps/libchdr/src/link.T b/cores/saturn/deps/libchdr/src/link.T new file mode 100644 index 000000000..ea37716b7 --- /dev/null +++ b/cores/saturn/deps/libchdr/src/link.T @@ -0,0 +1,5 @@ +{ + global: chd_*; + local: *; +}; + diff --git a/cores/saturn/deps/lzma/CMakeLists.txt b/cores/saturn/deps/lzma/CMakeLists.txt new file mode 100644 index 000000000..fb01f60f5 --- /dev/null +++ b/cores/saturn/deps/lzma/CMakeLists.txt @@ -0,0 +1,32 @@ +add_library(lzma STATIC + include/7zTypes.h + include/Alloc.h + include/Bra.h + include/Compiler.h + include/CpuArch.h + include/Delta.h + include/LzFind.h + include/LzHash.h + include/Lzma86.h + include/LzmaDec.h + include/LzmaEnc.h + include/LzmaLib.h + include/Precomp.h + include/Sort.h + src/Alloc.c + src/Bra86.c + src/BraIA64.c + src/CpuArch.c + src/Delta.c + src/LzFind.c + src/Lzma86Dec.c + src/LzmaDec.c + src/LzmaEnc.c + src/Sort.c +) + +target_compile_definitions(lzma PRIVATE _7ZIP_ST) + +target_include_directories(lzma PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include") +target_include_directories(lzma INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/include") + diff --git a/cores/saturn/deps/lzma/LICENSE b/cores/saturn/deps/lzma/LICENSE new file mode 100644 index 000000000..5f5705169 --- /dev/null +++ b/cores/saturn/deps/lzma/LICENSE @@ -0,0 +1,3 @@ +LZMA SDK is placed in the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or distribute the original LZMA SDK code, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. \ No newline at end of file diff --git a/cores/saturn/deps/lzma/include/7zTypes.h b/cores/saturn/deps/lzma/include/7zTypes.h new file mode 100644 index 000000000..65b3af63c --- /dev/null +++ b/cores/saturn/deps/lzma/include/7zTypes.h @@ -0,0 +1,375 @@ +/* 7zTypes.h -- Basic types +2018-08-04 : Igor Pavlov : Public domain */ + +#ifndef __7Z_TYPES_H +#define __7Z_TYPES_H + +#ifdef _WIN32 +/* #include */ +#endif + +#include + +#ifndef EXTERN_C_BEGIN +#ifdef __cplusplus +#define EXTERN_C_BEGIN extern "C" { +#define EXTERN_C_END } +#else +#define EXTERN_C_BEGIN +#define EXTERN_C_END +#endif +#endif + +EXTERN_C_BEGIN + +#define SZ_OK 0 + +#define SZ_ERROR_DATA 1 +#define SZ_ERROR_MEM 2 +#define SZ_ERROR_CRC 3 +#define SZ_ERROR_UNSUPPORTED 4 +#define SZ_ERROR_PARAM 5 +#define SZ_ERROR_INPUT_EOF 6 +#define SZ_ERROR_OUTPUT_EOF 7 +#define SZ_ERROR_READ 8 +#define SZ_ERROR_WRITE 9 +#define SZ_ERROR_PROGRESS 10 +#define SZ_ERROR_FAIL 11 +#define SZ_ERROR_THREAD 12 + +#define SZ_ERROR_ARCHIVE 16 +#define SZ_ERROR_NO_ARCHIVE 17 + +typedef int SRes; + + +#ifdef _WIN32 + +/* typedef DWORD WRes; */ +typedef unsigned WRes; +#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) + +#else + +typedef int WRes; +#define MY__FACILITY_WIN32 7 +#define MY__FACILITY__WRes MY__FACILITY_WIN32 +#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000))) + +#endif + + +#ifndef RINOK +#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } +#endif + +typedef unsigned char Byte; +typedef short Int16; +typedef unsigned short UInt16; + +#ifdef _LZMA_UINT32_IS_ULONG +typedef long Int32; +typedef unsigned long UInt32; +#else +typedef int Int32; +typedef unsigned int UInt32; +#endif + +#ifdef _SZ_NO_INT_64 + +/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. + NOTES: Some code will work incorrectly in that case! */ + +typedef long Int64; +typedef unsigned long UInt64; + +#else + +#if defined(_MSC_VER) || defined(__BORLANDC__) +typedef __int64 Int64; +typedef unsigned __int64 UInt64; +#define UINT64_CONST(n) n +#else +typedef long long int Int64; +typedef unsigned long long int UInt64; +#define UINT64_CONST(n) n ## ULL +#endif + +#endif + +#ifdef _LZMA_NO_SYSTEM_SIZE_T +typedef UInt32 SizeT; +#else +typedef size_t SizeT; +#endif + +typedef int BoolInt; +/* typedef BoolInt Bool; */ +#define True 1 +#define False 0 + + +#ifdef _WIN32 +#define MY_STD_CALL __stdcall +#else +#define MY_STD_CALL +#endif + +#ifdef _MSC_VER + +#if _MSC_VER >= 1300 +#define MY_NO_INLINE __declspec(noinline) +#else +#define MY_NO_INLINE +#endif + +#define MY_FORCE_INLINE __forceinline + +#define MY_CDECL __cdecl +#define MY_FAST_CALL __fastcall + +#else + +#define MY_NO_INLINE +#define MY_FORCE_INLINE +#define MY_CDECL +#define MY_FAST_CALL + +/* inline keyword : for C++ / C99 */ + +/* GCC, clang: */ +/* +#if defined (__GNUC__) && (__GNUC__ >= 4) +#define MY_FORCE_INLINE __attribute__((always_inline)) +#define MY_NO_INLINE __attribute__((noinline)) +#endif +*/ + +#endif + + +/* The following interfaces use first parameter as pointer to structure */ + +typedef struct IByteIn IByteIn; +struct IByteIn +{ + Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ +}; +#define IByteIn_Read(p) (p)->Read(p) + + +typedef struct IByteOut IByteOut; +struct IByteOut +{ + void (*Write)(const IByteOut *p, Byte b); +}; +#define IByteOut_Write(p, b) (p)->Write(p, b) + + +typedef struct ISeqInStream ISeqInStream; +struct ISeqInStream +{ + SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) < input(*size)) is allowed */ +}; +#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) + +/* it can return SZ_ERROR_INPUT_EOF */ +SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); +SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); + + +typedef struct ISeqOutStream ISeqOutStream; +struct ISeqOutStream +{ + size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); + /* Returns: result - the number of actually written bytes. + (result < size) means error */ +}; +#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size) + +typedef enum +{ + SZ_SEEK_SET = 0, + SZ_SEEK_CUR = 1, + SZ_SEEK_END = 2 +} ESzSeek; + + +typedef struct ISeekInStream ISeekInStream; +struct ISeekInStream +{ + SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); +}; +#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +typedef struct ILookInStream ILookInStream; +struct ILookInStream +{ + SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) > input(*size)) is not allowed + (output(*size) < input(*size)) is allowed */ + SRes (*Skip)(const ILookInStream *p, size_t offset); + /* offset must be <= output(*size) of Look */ + + SRes (*Read)(const ILookInStream *p, void *buf, size_t *size); + /* reads directly (without buffer). It's same as ISeqInStream::Read */ + SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); +}; + +#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) +#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset) +#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); + +/* reads via ILookInStream::Read */ +SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); + + + +typedef struct +{ + ILookInStream vt; + const ISeekInStream *realStream; + + size_t pos; + size_t size; /* it's data size */ + + /* the following variables must be set outside */ + Byte *buf; + size_t bufSize; +} CLookToRead2; + +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); + +#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } + + +typedef struct +{ + ISeqInStream vt; + const ILookInStream *realStream; +} CSecToLook; + +void SecToLook_CreateVTable(CSecToLook *p); + + + +typedef struct +{ + ISeqInStream vt; + const ILookInStream *realStream; +} CSecToRead; + +void SecToRead_CreateVTable(CSecToRead *p); + + +typedef struct ICompressProgress ICompressProgress; + +struct ICompressProgress +{ + SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); + /* Returns: result. (result != SZ_OK) means break. + Value (UInt64)(Int64)-1 for size means unknown value. */ +}; +#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) + + + +typedef struct ISzAlloc ISzAlloc; +typedef const ISzAlloc * ISzAllocPtr; + +struct ISzAlloc +{ + void *(*Alloc)(ISzAllocPtr p, size_t size); + void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */ +}; + +#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size) +#define ISzAlloc_Free(p, a) (p)->Free(p, a) + +/* deprecated */ +#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size) +#define IAlloc_Free(p, a) ISzAlloc_Free(p, a) + + + + + +#ifndef MY_offsetof + #ifdef offsetof + #define MY_offsetof(type, m) offsetof(type, m) + /* + #define MY_offsetof(type, m) FIELD_OFFSET(type, m) + */ + #else + #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m)) + #endif +#endif + + + +#ifndef MY_container_of + +/* +#define MY_container_of(ptr, type, m) container_of(ptr, type, m) +#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) +#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) +#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) +*/ + +/* + GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly" + GCC 3.4.4 : classes with constructor + GCC 4.8.1 : classes with non-public variable members" +*/ + +#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) + + +#endif + +#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr)) + +/* +#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +*/ +#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) + +#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +/* +#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) +*/ + + + +#ifdef _WIN32 + +#define CHAR_PATH_SEPARATOR '\\' +#define WCHAR_PATH_SEPARATOR L'\\' +#define STRING_PATH_SEPARATOR "\\" +#define WSTRING_PATH_SEPARATOR L"\\" + +#else + +#define CHAR_PATH_SEPARATOR '/' +#define WCHAR_PATH_SEPARATOR L'/' +#define STRING_PATH_SEPARATOR "/" +#define WSTRING_PATH_SEPARATOR L"/" + +#endif + +EXTERN_C_END + +#endif diff --git a/cores/saturn/deps/lzma/include/Alloc.h b/cores/saturn/deps/lzma/include/Alloc.h new file mode 100644 index 000000000..648237646 --- /dev/null +++ b/cores/saturn/deps/lzma/include/Alloc.h @@ -0,0 +1,51 @@ +/* Alloc.h -- Memory allocation functions +2018-02-19 : Igor Pavlov : Public domain */ + +#ifndef __COMMON_ALLOC_H +#define __COMMON_ALLOC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +void *MyAlloc(size_t size); +void MyFree(void *address); + +#ifdef _WIN32 + +void SetLargePageSize(); + +void *MidAlloc(size_t size); +void MidFree(void *address); +void *BigAlloc(size_t size); +void BigFree(void *address); + +#else + +#define MidAlloc(size) MyAlloc(size) +#define MidFree(address) MyFree(address) +#define BigAlloc(size) MyAlloc(size) +#define BigFree(address) MyFree(address) + +#endif + +extern const ISzAlloc g_Alloc; +extern const ISzAlloc g_BigAlloc; +extern const ISzAlloc g_MidAlloc; +extern const ISzAlloc g_AlignedAlloc; + + +typedef struct +{ + ISzAlloc vt; + ISzAllocPtr baseAlloc; + unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */ + size_t offset; /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */ +} CAlignOffsetAlloc; + +void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p); + + +EXTERN_C_END + +#endif diff --git a/cores/saturn/deps/lzma/include/Bra.h b/cores/saturn/deps/lzma/include/Bra.h new file mode 100644 index 000000000..855e37a6b --- /dev/null +++ b/cores/saturn/deps/lzma/include/Bra.h @@ -0,0 +1,64 @@ +/* Bra.h -- Branch converters for executables +2013-01-18 : Igor Pavlov : Public domain */ + +#ifndef __BRA_H +#define __BRA_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* +These functions convert relative addresses to absolute addresses +in CALL instructions to increase the compression ratio. + + In: + data - data buffer + size - size of data + ip - current virtual Instruction Pinter (IP) value + state - state variable for x86 converter + encoding - 0 (for decoding), 1 (for encoding) + + Out: + state - state variable for x86 converter + + Returns: + The number of processed bytes. If you call these functions with multiple calls, + you must start next call with first byte after block of processed bytes. + + Type Endian Alignment LookAhead + + x86 little 1 4 + ARMT little 2 2 + ARM little 4 0 + PPC big 4 0 + SPARC big 4 0 + IA64 little 16 0 + + size must be >= Alignment + LookAhead, if it's not last block. + If (size < Alignment + LookAhead), converter returns 0. + + Example: + + UInt32 ip = 0; + for () + { + ; size must be >= Alignment + LookAhead, if it's not last block + SizeT processed = Convert(data, size, ip, 1); + data += processed; + size -= processed; + ip += processed; + } +*/ + +#define x86_Convert_Init(state) { state = 0; } +SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding); +SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); +SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); +SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); +SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); +SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); + +EXTERN_C_END + +#endif diff --git a/cores/saturn/deps/lzma/include/Compiler.h b/cores/saturn/deps/lzma/include/Compiler.h new file mode 100644 index 000000000..0cc409d8a --- /dev/null +++ b/cores/saturn/deps/lzma/include/Compiler.h @@ -0,0 +1,33 @@ +/* Compiler.h +2017-04-03 : Igor Pavlov : Public domain */ + +#ifndef __7Z_COMPILER_H +#define __7Z_COMPILER_H + +#ifdef _MSC_VER + + #ifdef UNDER_CE + #define RPC_NO_WINDOWS_H + /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */ + #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union + #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int + #endif + + #if _MSC_VER >= 1300 + #pragma warning(disable : 4996) // This function or variable may be unsafe + #else + #pragma warning(disable : 4511) // copy constructor could not be generated + #pragma warning(disable : 4512) // assignment operator could not be generated + #pragma warning(disable : 4514) // unreferenced inline function has been removed + #pragma warning(disable : 4702) // unreachable code + #pragma warning(disable : 4710) // not inlined + #pragma warning(disable : 4714) // function marked as __forceinline not inlined + #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information + #endif + +#endif + +#define UNUSED_VAR(x) (void)x; +/* #define UNUSED_VAR(x) x=x; */ + +#endif diff --git a/cores/saturn/deps/lzma/include/CpuArch.h b/cores/saturn/deps/lzma/include/CpuArch.h new file mode 100644 index 000000000..bd4293880 --- /dev/null +++ b/cores/saturn/deps/lzma/include/CpuArch.h @@ -0,0 +1,336 @@ +/* CpuArch.h -- CPU specific code +2018-02-18 : Igor Pavlov : Public domain */ + +#ifndef __CPU_ARCH_H +#define __CPU_ARCH_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* +MY_CPU_LE means that CPU is LITTLE ENDIAN. +MY_CPU_BE means that CPU is BIG ENDIAN. +If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform. + +MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses. +*/ + +#if defined(_M_X64) \ + || defined(_M_AMD64) \ + || defined(__x86_64__) \ + || defined(__AMD64__) \ + || defined(__amd64__) + #define MY_CPU_AMD64 + #ifdef __ILP32__ + #define MY_CPU_NAME "x32" + #else + #define MY_CPU_NAME "x64" + #endif + #define MY_CPU_64BIT +#endif + + +#if defined(_M_IX86) \ + || defined(__i386__) + #define MY_CPU_X86 + #define MY_CPU_NAME "x86" + #define MY_CPU_32BIT +#endif + + +#if defined(_M_ARM64) \ + || defined(__AARCH64EL__) \ + || defined(__AARCH64EB__) \ + || defined(__aarch64__) + #define MY_CPU_ARM64 + #define MY_CPU_NAME "arm64" + #define MY_CPU_64BIT +#endif + + +#if defined(_M_ARM) \ + || defined(_M_ARM_NT) \ + || defined(_M_ARMT) \ + || defined(__arm__) \ + || defined(__thumb__) \ + || defined(__ARMEL__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEL__) \ + || defined(__THUMBEB__) + #define MY_CPU_ARM + #define MY_CPU_NAME "arm" + #define MY_CPU_32BIT +#endif + + +#if defined(_M_IA64) \ + || defined(__ia64__) + #define MY_CPU_IA64 + #define MY_CPU_NAME "ia64" + #define MY_CPU_64BIT +#endif + + +#if defined(__mips64) \ + || defined(__mips64__) \ + || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3)) + #define MY_CPU_NAME "mips64" + #define MY_CPU_64BIT +#elif defined(__mips__) + #define MY_CPU_NAME "mips" + /* #define MY_CPU_32BIT */ +#endif + + +#if defined(__ppc64__) \ + || defined(__powerpc64__) + #ifdef __ILP32__ + #define MY_CPU_NAME "ppc64-32" + #else + #define MY_CPU_NAME "ppc64" + #endif + #define MY_CPU_64BIT +#elif defined(__ppc__) \ + || defined(__powerpc__) + #define MY_CPU_NAME "ppc" + #define MY_CPU_32BIT +#endif + + +#if defined(__sparc64__) + #define MY_CPU_NAME "sparc64" + #define MY_CPU_64BIT +#elif defined(__sparc__) + #define MY_CPU_NAME "sparc" + /* #define MY_CPU_32BIT */ +#endif + + +#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) +#define MY_CPU_X86_OR_AMD64 +#endif + + +#ifdef _WIN32 + + #ifdef MY_CPU_ARM + #define MY_CPU_ARM_LE + #endif + + #ifdef MY_CPU_ARM64 + #define MY_CPU_ARM64_LE + #endif + + #ifdef _M_IA64 + #define MY_CPU_IA64_LE + #endif + +#endif + + +#if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM_LE) \ + || defined(MY_CPU_ARM64_LE) \ + || defined(MY_CPU_IA64_LE) \ + || defined(__LITTLE_ENDIAN__) \ + || defined(__ARMEL__) \ + || defined(__THUMBEL__) \ + || defined(__AARCH64EL__) \ + || defined(__MIPSEL__) \ + || defined(__MIPSEL) \ + || defined(_MIPSEL) \ + || defined(__BFIN__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) + #define MY_CPU_LE +#endif + +#if defined(__BIG_ENDIAN__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEB__) \ + || defined(__AARCH64EB__) \ + || defined(__MIPSEB__) \ + || defined(__MIPSEB) \ + || defined(_MIPSEB) \ + || defined(__m68k__) \ + || defined(__s390__) \ + || defined(__s390x__) \ + || defined(__zarch__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) + #define MY_CPU_BE +#endif + + +#if defined(MY_CPU_LE) && defined(MY_CPU_BE) + #error Stop_Compiling_Bad_Endian +#endif + + +#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) + #error Stop_Compiling_Bad_32_64_BIT +#endif + + +#ifndef MY_CPU_NAME + #ifdef MY_CPU_LE + #define MY_CPU_NAME "LE" + #elif defined(MY_CPU_BE) + #define MY_CPU_NAME "BE" + #else + /* + #define MY_CPU_NAME "" + */ + #endif +#endif + + + + + +#ifdef MY_CPU_LE + #if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM64) \ + || defined(__ARM_FEATURE_UNALIGNED) + #define MY_CPU_LE_UNALIGN + #endif +#endif + + +#ifdef MY_CPU_LE_UNALIGN + +#define GetUi16(p) (*(const UInt16 *)(const void *)(p)) +#define GetUi32(p) (*(const UInt32 *)(const void *)(p)) +#define GetUi64(p) (*(const UInt64 *)(const void *)(p)) + +#define SetUi16(p, v) { *(UInt16 *)(p) = (v); } +#define SetUi32(p, v) { *(UInt32 *)(p) = (v); } +#define SetUi64(p, v) { *(UInt64 *)(p) = (v); } + +#else + +#define GetUi16(p) ( (UInt16) ( \ + ((const Byte *)(p))[0] | \ + ((UInt16)((const Byte *)(p))[1] << 8) )) + +#define GetUi32(p) ( \ + ((const Byte *)(p))[0] | \ + ((UInt32)((const Byte *)(p))[1] << 8) | \ + ((UInt32)((const Byte *)(p))[2] << 16) | \ + ((UInt32)((const Byte *)(p))[3] << 24)) + +#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) + +#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); } + +#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); \ + _ppp_[2] = (Byte)(_vvv_ >> 16); \ + _ppp_[3] = (Byte)(_vvv_ >> 24); } + +#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ + SetUi32(_ppp2_ , (UInt32)_vvv2_); \ + SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); } + +#endif + +#ifdef __has_builtin + #define MY__has_builtin(x) __has_builtin(x) +#else + #define MY__has_builtin(x) 0 +#endif + +#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300) + +/* Note: we use bswap instruction, that is unsupported in 386 cpu */ + +#include + +#pragma intrinsic(_byteswap_ushort) +#pragma intrinsic(_byteswap_ulong) +#pragma intrinsic(_byteswap_uint64) + +/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */ +#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p)) +#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p)) + +#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v) + +#elif defined(MY_CPU_LE_UNALIGN) && ( \ + (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) ) + +/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */ +#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p)) +#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p)) + +#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v) + +#else + +#define GetBe32(p) ( \ + ((UInt32)((const Byte *)(p))[0] << 24) | \ + ((UInt32)((const Byte *)(p))[1] << 16) | \ + ((UInt32)((const Byte *)(p))[2] << 8) | \ + ((const Byte *)(p))[3] ) + +#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) + +#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)(_vvv_ >> 24); \ + _ppp_[1] = (Byte)(_vvv_ >> 16); \ + _ppp_[2] = (Byte)(_vvv_ >> 8); \ + _ppp_[3] = (Byte)_vvv_; } + +#endif + + +#ifndef GetBe16 + +#define GetBe16(p) ( (UInt16) ( \ + ((UInt16)((const Byte *)(p))[0] << 8) | \ + ((const Byte *)(p))[1] )) + +#endif + + + +#ifdef MY_CPU_X86_OR_AMD64 + +typedef struct +{ + UInt32 maxFunc; + UInt32 vendor[3]; + UInt32 ver; + UInt32 b; + UInt32 c; + UInt32 d; +} Cx86cpuid; + +enum +{ + CPU_FIRM_INTEL, + CPU_FIRM_AMD, + CPU_FIRM_VIA +}; + +void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d); + +BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p); +int x86cpuid_GetFirm(const Cx86cpuid *p); + +#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF)) +#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF)) +#define x86cpuid_GetStepping(ver) (ver & 0xF) + +BoolInt CPU_Is_InOrder(); +BoolInt CPU_Is_Aes_Supported(); +BoolInt CPU_IsSupported_PageGB(); + +#endif + +EXTERN_C_END + +#endif diff --git a/cores/saturn/deps/lzma/include/Delta.h b/cores/saturn/deps/lzma/include/Delta.h new file mode 100644 index 000000000..2fa54ad67 --- /dev/null +++ b/cores/saturn/deps/lzma/include/Delta.h @@ -0,0 +1,19 @@ +/* Delta.h -- Delta converter +2013-01-18 : Igor Pavlov : Public domain */ + +#ifndef __DELTA_H +#define __DELTA_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define DELTA_STATE_SIZE 256 + +void Delta_Init(Byte *state); +void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size); +void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size); + +EXTERN_C_END + +#endif diff --git a/cores/saturn/deps/lzma/include/LzFind.h b/cores/saturn/deps/lzma/include/LzFind.h new file mode 100644 index 000000000..42c13be15 --- /dev/null +++ b/cores/saturn/deps/lzma/include/LzFind.h @@ -0,0 +1,121 @@ +/* LzFind.h -- Match finder for LZ algorithms +2017-06-10 : Igor Pavlov : Public domain */ + +#ifndef __LZ_FIND_H +#define __LZ_FIND_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +typedef UInt32 CLzRef; + +typedef struct _CMatchFinder +{ + Byte *buffer; + UInt32 pos; + UInt32 posLimit; + UInt32 streamPos; + UInt32 lenLimit; + + UInt32 cyclicBufferPos; + UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ + + Byte streamEndWasReached; + Byte btMode; + Byte bigHash; + Byte directInput; + + UInt32 matchMaxLen; + CLzRef *hash; + CLzRef *son; + UInt32 hashMask; + UInt32 cutValue; + + Byte *bufferBase; + ISeqInStream *stream; + + UInt32 blockSize; + UInt32 keepSizeBefore; + UInt32 keepSizeAfter; + + UInt32 numHashBytes; + size_t directInputRem; + UInt32 historySize; + UInt32 fixedHashSize; + UInt32 hashSizeSum; + SRes result; + UInt32 crc[256]; + size_t numRefs; + + UInt64 expectedDataSize; +} CMatchFinder; + +#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) + +#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) + +#define Inline_MatchFinder_IsFinishedOK(p) \ + ((p)->streamEndWasReached \ + && (p)->streamPos == (p)->pos \ + && (!(p)->directInput || (p)->directInputRem == 0)) + +int MatchFinder_NeedMove(CMatchFinder *p); +Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); +void MatchFinder_MoveBlock(CMatchFinder *p); +void MatchFinder_ReadIfRequired(CMatchFinder *p); + +void MatchFinder_Construct(CMatchFinder *p); + +/* Conditions: + historySize <= 3 GB + keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB +*/ +int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAllocPtr alloc); +void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); +void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); +void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); + +UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, + UInt32 *distances, UInt32 maxLen); + +/* +Conditions: + Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. + Mf_GetPointerToCurrentPos_Func's result must be used only before any other function +*/ + +typedef void (*Mf_Init_Func)(void *object); +typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); +typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); +typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances); +typedef void (*Mf_Skip_Func)(void *object, UInt32); + +typedef struct _IMatchFinder +{ + Mf_Init_Func Init; + Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; + Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; + Mf_GetMatches_Func GetMatches; + Mf_Skip_Func Skip; +} IMatchFinder; + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); + +void MatchFinder_Init_LowHash(CMatchFinder *p); +void MatchFinder_Init_HighHash(CMatchFinder *p); +void MatchFinder_Init_3(CMatchFinder *p, int readData); +void MatchFinder_Init(CMatchFinder *p); + +UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); +UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); + +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); + +EXTERN_C_END + +#endif diff --git a/cores/saturn/deps/lzma/include/LzHash.h b/cores/saturn/deps/lzma/include/LzHash.h new file mode 100644 index 000000000..e7c942303 --- /dev/null +++ b/cores/saturn/deps/lzma/include/LzHash.h @@ -0,0 +1,57 @@ +/* LzHash.h -- HASH functions for LZ algorithms +2015-04-12 : Igor Pavlov : Public domain */ + +#ifndef __LZ_HASH_H +#define __LZ_HASH_H + +#define kHash2Size (1 << 10) +#define kHash3Size (1 << 16) +#define kHash4Size (1 << 20) + +#define kFix3HashSize (kHash2Size) +#define kFix4HashSize (kHash2Size + kHash3Size) +#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) + +#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8); + +#define HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } + +#define HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; } + +#define HASH5_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + temp ^= (p->crc[cur[3]] << 5); \ + h4 = temp & (kHash4Size - 1); \ + hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; } + +/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ +#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; + + +#define MT_HASH2_CALC \ + h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); + +#define MT_HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } + +#define MT_HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); } + +#endif diff --git a/cores/saturn/deps/lzma/include/Lzma86.h b/cores/saturn/deps/lzma/include/Lzma86.h new file mode 100644 index 000000000..bebed5cb7 --- /dev/null +++ b/cores/saturn/deps/lzma/include/Lzma86.h @@ -0,0 +1,111 @@ +/* Lzma86.h -- LZMA + x86 (BCJ) Filter +2013-01-18 : Igor Pavlov : Public domain */ + +#ifndef __LZMA86_H +#define __LZMA86_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define LZMA86_SIZE_OFFSET (1 + 5) +#define LZMA86_HEADER_SIZE (LZMA86_SIZE_OFFSET + 8) + +/* +It's an example for LZMA + x86 Filter use. +You can use .lzma86 extension, if you write that stream to file. +.lzma86 header adds one additional byte to standard .lzma header. +.lzma86 header (14 bytes): + Offset Size Description + 0 1 = 0 - no filter, pure LZMA + = 1 - x86 filter + LZMA + 1 1 lc, lp and pb in encoded form + 2 4 dictSize (little endian) + 6 8 uncompressed size (little endian) + + +Lzma86_Encode +------------- +level - compression level: 0 <= level <= 9, the default value for "level" is 5. + +dictSize - The dictionary size in bytes. The maximum value is + 128 MB = (1 << 27) bytes for 32-bit version + 1 GB = (1 << 30) bytes for 64-bit version + The default value is 16 MB = (1 << 24) bytes, for level = 5. + It's recommended to use the dictionary that is larger than 4 KB and + that can be calculated as (1 << N) or (3 << N) sizes. + For better compression ratio dictSize must be >= inSize. + +filterMode: + SZ_FILTER_NO - no Filter + SZ_FILTER_YES - x86 Filter + SZ_FILTER_AUTO - it tries both alternatives to select best. + Encoder will use 2 or 3 passes: + 2 passes when FILTER_NO provides better compression. + 3 passes when FILTER_YES provides better compression. + +Lzma86Encode allocates Data with MyAlloc functions. +RAM Requirements for compressing: + RamSize = dictionarySize * 11.5 + 6MB + FilterBlockSize + filterMode FilterBlockSize + SZ_FILTER_NO 0 + SZ_FILTER_YES inSize + SZ_FILTER_AUTO inSize + + +Return code: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +*/ + +enum ESzFilterMode +{ + SZ_FILTER_NO, + SZ_FILTER_YES, + SZ_FILTER_AUTO +}; + +SRes Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen, + int level, UInt32 dictSize, int filterMode); + + +/* +Lzma86_GetUnpackSize: + In: + src - input data + srcLen - input data size + Out: + unpackSize - size of uncompressed stream + Return code: + SZ_OK - OK + SZ_ERROR_INPUT_EOF - Error in headers +*/ + +SRes Lzma86_GetUnpackSize(const Byte *src, SizeT srcLen, UInt64 *unpackSize); + +/* +Lzma86_Decode: + In: + dest - output data + destLen - output data size + src - input data + srcLen - input data size + Out: + destLen - processed output size + srcLen - processed input size + Return code: + SZ_OK - OK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - unsupported file + SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer +*/ + +SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen); + +EXTERN_C_END + +#endif diff --git a/cores/saturn/deps/lzma/include/LzmaDec.h b/cores/saturn/deps/lzma/include/LzmaDec.h new file mode 100644 index 000000000..1f0927ab1 --- /dev/null +++ b/cores/saturn/deps/lzma/include/LzmaDec.h @@ -0,0 +1,234 @@ +/* LzmaDec.h -- LZMA Decoder +2018-04-21 : Igor Pavlov : Public domain */ + +#ifndef __LZMA_DEC_H +#define __LZMA_DEC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* #define _LZMA_PROB32 */ +/* _LZMA_PROB32 can increase the speed on some CPUs, + but memory usage for CLzmaDec::probs will be doubled in that case */ + +typedef +#ifdef _LZMA_PROB32 + UInt32 +#else + UInt16 +#endif + CLzmaProb; + + +/* ---------- LZMA Properties ---------- */ + +#define LZMA_PROPS_SIZE 5 + +typedef struct _CLzmaProps +{ + Byte lc; + Byte lp; + Byte pb; + Byte _pad_; + UInt32 dicSize; +} CLzmaProps; + +/* LzmaProps_Decode - decodes properties +Returns: + SZ_OK + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); + + +/* ---------- LZMA Decoder state ---------- */ + +/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. + Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ + +#define LZMA_REQUIRED_INPUT_MAX 20 + +typedef struct +{ + /* Don't change this structure. ASM code can use it. */ + CLzmaProps prop; + CLzmaProb *probs; + CLzmaProb *probs_1664; + Byte *dic; + SizeT dicBufSize; + SizeT dicPos; + const Byte *buf; + UInt32 range; + UInt32 code; + UInt32 processedPos; + UInt32 checkDicSize; + UInt32 reps[4]; + UInt32 state; + UInt32 remainLen; + + UInt32 numProbs; + unsigned tempBufSize; + Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; +} CLzmaDec; + +#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } + +void LzmaDec_Init(CLzmaDec *p); + +/* There are two types of LZMA streams: + - Stream with end mark. That end mark adds about 6 bytes to compressed size. + - Stream without end mark. You must know exact uncompressed size to decompress such stream. */ + +typedef enum +{ + LZMA_FINISH_ANY, /* finish at any point */ + LZMA_FINISH_END /* block must be finished at the end */ +} ELzmaFinishMode; + +/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! + + You must use LZMA_FINISH_END, when you know that current output buffer + covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. + + If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, + and output value of destLen will be less than output buffer size limit. + You can check status result also. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + +typedef enum +{ + LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ + LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ + LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ + LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ +} ELzmaStatus; + +/* ELzmaStatus is used only as output value for function call */ + + +/* ---------- Interfaces ---------- */ + +/* There are 3 levels of interfaces: + 1) Dictionary Interface + 2) Buffer Interface + 3) One Call Interface + You can select any of these interfaces, but don't mix functions from different + groups for same object. */ + + +/* There are two variants to allocate state for Dictionary Interface: + 1) LzmaDec_Allocate / LzmaDec_Free + 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs + You can use variant 2, if you set dictionary buffer manually. + For Buffer Interface you must always use variant 1. + +LzmaDec_Allocate* can return: + SZ_OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc); + +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc); + +/* ---------- Dictionary Interface ---------- */ + +/* You can use it, if you want to eliminate the overhead for data copying from + dictionary to some other external buffer. + You must work with CLzmaDec variables directly in this interface. + + STEPS: + LzmaDec_Construct() + LzmaDec_Allocate() + for (each new stream) + { + LzmaDec_Init() + while (it needs more decompression) + { + LzmaDec_DecodeToDic() + use data from CLzmaDec::dic and update CLzmaDec::dicPos + } + } + LzmaDec_Free() +*/ + +/* LzmaDec_DecodeToDic + + The decoding to internal dictionary buffer (CLzmaDec::dic). + You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! + +finishMode: + It has meaning only if the decoding reaches output limit (dicLimit). + LZMA_FINISH_ANY - Decode just dicLimit bytes. + LZMA_FINISH_END - Stream must be finished after dicLimit. + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_NEEDS_MORE_INPUT + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error +*/ + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- Buffer Interface ---------- */ + +/* It's zlib-like interface. + See LzmaDec_DecodeToDic description for information about STEPS and return results, + but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need + to work with CLzmaDec variables manually. + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). +*/ + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- One Call Interface ---------- */ + +/* LzmaDecode + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). +*/ + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAllocPtr alloc); + +EXTERN_C_END + +#endif diff --git a/cores/saturn/deps/lzma/include/LzmaEnc.h b/cores/saturn/deps/lzma/include/LzmaEnc.h new file mode 100644 index 000000000..9194ee576 --- /dev/null +++ b/cores/saturn/deps/lzma/include/LzmaEnc.h @@ -0,0 +1,76 @@ +/* LzmaEnc.h -- LZMA Encoder +2017-07-27 : Igor Pavlov : Public domain */ + +#ifndef __LZMA_ENC_H +#define __LZMA_ENC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define LZMA_PROPS_SIZE 5 + +typedef struct _CLzmaEncProps +{ + int level; /* 0 <= level <= 9 */ + UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version + (1 << 12) <= dictSize <= (3 << 29) for 64-bit version + default = (1 << 24) */ + int lc; /* 0 <= lc <= 8, default = 3 */ + int lp; /* 0 <= lp <= 4, default = 0 */ + int pb; /* 0 <= pb <= 4, default = 2 */ + int algo; /* 0 - fast, 1 - normal, default = 1 */ + int fb; /* 5 <= fb <= 273, default = 32 */ + int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ + int numHashBytes; /* 2, 3 or 4, default = 4 */ + UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ + unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ + int numThreads; /* 1 or 2, default = 2 */ + + UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. + Encoder uses this value to reduce dictionary size */ +} CLzmaEncProps; + +void LzmaEncProps_Init(CLzmaEncProps *p); +void LzmaEncProps_Normalize(CLzmaEncProps *p); +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); + + +/* ---------- CLzmaEncHandle Interface ---------- */ + +/* LzmaEnc* functions can return the following exit codes: +SRes: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater in props + SZ_ERROR_WRITE - ISeqOutStream write callback error + SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output + SZ_ERROR_PROGRESS - some break from progress callback + SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) +*/ + +typedef void * CLzmaEncHandle; + +CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); + +SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); +void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize); +SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); + +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + + +/* ---------- One Call Interface ---------- */ + +SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + +EXTERN_C_END + +#endif diff --git a/cores/saturn/deps/lzma/include/LzmaLib.h b/cores/saturn/deps/lzma/include/LzmaLib.h new file mode 100644 index 000000000..88fa87d35 --- /dev/null +++ b/cores/saturn/deps/lzma/include/LzmaLib.h @@ -0,0 +1,131 @@ +/* LzmaLib.h -- LZMA library interface +2013-01-18 : Igor Pavlov : Public domain */ + +#ifndef __LZMA_LIB_H +#define __LZMA_LIB_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define MY_STDAPI int MY_STD_CALL + +#define LZMA_PROPS_SIZE 5 + +/* +RAM requirements for LZMA: + for compression: (dictSize * 11.5 + 6 MB) + state_size + for decompression: dictSize + state_size + state_size = (4 + (1.5 << (lc + lp))) KB + by default (lc=3, lp=0), state_size = 16 KB. + +LZMA properties (5 bytes) format + Offset Size Description + 0 1 lc, lp and pb in encoded form. + 1 4 dictSize (little endian). +*/ + +/* +LzmaCompress +------------ + +outPropsSize - + In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. + Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. + + LZMA Encoder will use defult values for any parameter, if it is + -1 for any from: level, loc, lp, pb, fb, numThreads + 0 for dictSize + +level - compression level: 0 <= level <= 9; + + level dictSize algo fb + 0: 16 KB 0 32 + 1: 64 KB 0 32 + 2: 256 KB 0 32 + 3: 1 MB 0 32 + 4: 4 MB 0 32 + 5: 16 MB 1 32 + 6: 32 MB 1 32 + 7+: 64 MB 1 64 + + The default value for "level" is 5. + + algo = 0 means fast method + algo = 1 means normal method + +dictSize - The dictionary size in bytes. The maximum value is + 128 MB = (1 << 27) bytes for 32-bit version + 1 GB = (1 << 30) bytes for 64-bit version + The default value is 16 MB = (1 << 24) bytes. + It's recommended to use the dictionary that is larger than 4 KB and + that can be calculated as (1 << N) or (3 << N) sizes. + +lc - The number of literal context bits (high bits of previous literal). + It can be in the range from 0 to 8. The default value is 3. + Sometimes lc=4 gives the gain for big files. + +lp - The number of literal pos bits (low bits of current position for literals). + It can be in the range from 0 to 4. The default value is 0. + The lp switch is intended for periodical data when the period is equal to 2^lp. + For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's + better to set lc=0, if you change lp switch. + +pb - The number of pos bits (low bits of current position). + It can be in the range from 0 to 4. The default value is 2. + The pb switch is intended for periodical data when the period is equal 2^pb. + +fb - Word size (the number of fast bytes). + It can be in the range from 5 to 273. The default value is 32. + Usually, a big number gives a little bit better compression ratio and + slower compression process. + +numThreads - The number of thereads. 1 or 2. The default value is 2. + Fast mode (algo = 0) can use only 1 thread. + +Out: + destLen - processed output size +Returns: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +*/ + +MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, + unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */ + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* default = (1 << 24) */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ + ); + +/* +LzmaUncompress +-------------- +In: + dest - output data + destLen - output data size + src - input data + srcLen - input data size +Out: + destLen - processed output size + srcLen - processed input size +Returns: + SZ_OK - OK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation arror + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) +*/ + +MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen, + const unsigned char *props, size_t propsSize); + +EXTERN_C_END + +#endif diff --git a/cores/saturn/deps/lzma/include/Precomp.h b/cores/saturn/deps/lzma/include/Precomp.h new file mode 100644 index 000000000..e8ff8b40e --- /dev/null +++ b/cores/saturn/deps/lzma/include/Precomp.h @@ -0,0 +1,10 @@ +/* Precomp.h -- StdAfx +2013-11-12 : Igor Pavlov : Public domain */ + +#ifndef __7Z_PRECOMP_H +#define __7Z_PRECOMP_H + +#include "Compiler.h" +/* #include "7zTypes.h" */ + +#endif diff --git a/cores/saturn/deps/lzma/include/Sort.h b/cores/saturn/deps/lzma/include/Sort.h new file mode 100644 index 000000000..2e2963a23 --- /dev/null +++ b/cores/saturn/deps/lzma/include/Sort.h @@ -0,0 +1,18 @@ +/* Sort.h -- Sort functions +2014-04-05 : Igor Pavlov : Public domain */ + +#ifndef __7Z_SORT_H +#define __7Z_SORT_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +void HeapSort(UInt32 *p, size_t size); +void HeapSort64(UInt64 *p, size_t size); + +/* void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size); */ + +EXTERN_C_END + +#endif diff --git a/cores/saturn/deps/lzma/lzma-history.txt b/cores/saturn/deps/lzma/lzma-history.txt new file mode 100644 index 000000000..48ee74813 --- /dev/null +++ b/cores/saturn/deps/lzma/lzma-history.txt @@ -0,0 +1,446 @@ +HISTORY of the LZMA SDK +----------------------- + +19.00 2019-02-21 +------------------------- +- Encryption strength for 7z archives was increased: + the size of random initialization vector was increased from 64-bit to 128-bit, + and the pseudo-random number generator was improved. +- The bug in 7zIn.c code was fixed. + + +18.06 2018-12-30 +------------------------- +- The speed for LZMA/LZMA2 compressing was increased by 3-10%, + and there are minor changes in compression ratio. +- Some bugs were fixed. +- The bug in 7-Zip 18.02-18.05 was fixed: + There was memory leak in multithreading xz decoder - XzDecMt_Decode(), + if xz stream contains only one block. +- The changes for MSVS compiler makefiles: + - the makefiles now use "PLATFORM" macroname with values (x64, x86, arm64) + instead of "CPU" macroname with values (AMD64, ARM64). + - the makefiles by default now use static version of the run-time library. + + +18.05 2018-04-30 +------------------------- +- The speed for LZMA/LZMA2 compressing was increased + by 8% for fastest/fast compression levels and + by 3% for normal/maximum compression levels. +- Previous versions of 7-Zip could work incorrectly in "Large memory pages" mode in + Windows 10 because of some BUG with "Large Pages" in Windows 10. + Now 7-Zip doesn't use "Large Pages" on Windows 10 up to revision 1709 (16299). +- The BUG was fixed in Lzma2Enc.c + Lzma2Enc_Encode2() function worked incorretly, + if (inStream == NULL) and the number of block threads is more than 1. + + +18.03 beta 2018-03-04 +------------------------- +- Asm\x86\LzmaDecOpt.asm: new optimized LZMA decoder written in asm + for x64 with about 30% higher speed than main version of LZMA decoder written in C. +- The speed for single-thread LZMA/LZMA2 decoder written in C was increased by 3%. +- 7-Zip now can use multi-threading for 7z/LZMA2 decoding, + if there are multiple independent data chunks in LZMA2 stream. +- 7-Zip now can use multi-threading for xz decoding, + if there are multiple blocks in xz stream. + + +18.01 2019-01-28 +------------------------- +- The BUG in 17.01 - 18.00 beta was fixed: + XzDec.c : random block unpacking and XzUnpacker_IsBlockFinished() + didn't work correctly for xz archives without checksum (CRC). + + +18.00 beta 2019-01-10 +------------------------- +- The BUG in xz encoder was fixed: + There was memory leak of 16 KB for each file compressed with + xz compression method, if additional filter was used. + + +17.01 beta 2017-08-28 +------------------------- +- Minor speed optimization for LZMA2 (xz and 7z) multi-threading compression. + 7-Zip now uses additional memory buffers for multi-block LZMA2 compression. + CPU utilization was slightly improved. +- 7-zip now creates multi-block xz archives by default. Block size can be + specified with -ms[Size]{m|g} switch. +- xz decoder now can unpack random block from multi-block xz archives. +- 7-Zip command line: @listfile now doesn't work after -- switch. + Use -i@listfile before -- switch instead. +- The BUGs were fixed: + 7-Zip 17.00 beta crashed for commands that write anti-item to 7z archive. + + +17.00 beta 2017-04-29 +------------------------- +- NewHandler.h / NewHandler.cpp: + now it redefines operator new() only for old MSVC compilers (_MSC_VER < 1900). +- C/7zTypes.h : the names of variables in interface structures were changed (vt). +- Some bugs were fixed. 7-Zip could crash in some cases. +- Some internal changes in code. + + +16.04 2016-10-04 +------------------------- +- The bug was fixed in DllSecur.c. + + +16.03 2016-09-28 +------------------------- +- SFX modules now use some protection against DLL preloading attack. +- Some bugs in 7z code were fixed. + + +16.02 2016-05-21 +------------------------- +- The BUG in 16.00 - 16.01 was fixed: + Split Handler (SplitHandler.cpp) returned incorrect + total size value (kpidSize) for split archives. + + +16.01 2016-05-19 +------------------------- +- Some internal changes to reduce the number of compiler warnings. + + +16.00 2016-05-10 +------------------------- +- Some bugs were fixed. + + +15.12 2015-11-19 +------------------------- +- The BUG in C version of 7z decoder was fixed: + 7zDec.c : SzDecodeLzma2() + 7z decoder could mistakenly report about decoding error for some 7z archives + that use LZMA2 compression method. + The probability to get that mistaken decoding error report was about + one error per 16384 solid blocks for solid blocks larger than 16 KB (compressed size). +- The BUG (in 9.26-15.11) in C version of 7z decoder was fixed: + 7zArcIn.c : SzReadHeader2() + 7z decoder worked incorrectly for 7z archives that contain + empty solid blocks, that can be placed to 7z archive, if some file is + unavailable for reading during archive creation. + + +15.09 beta 2015-10-16 +------------------------- +- The BUG in LZMA / LZMA2 encoding code was fixed. + The BUG in LzFind.c::MatchFinder_ReadBlock() function. + If input data size is larger than (4 GiB - dictionary_size), + the following code worked incorrectly: + - LZMA : LzmaEnc_MemEncode(), LzmaEncode() : LZMA encoding functions + for compressing from memory to memory. + That BUG is not related to LZMA encoder version that works via streams. + - LZMA2 : multi-threaded version of LZMA2 encoder worked incorrectly, if + default value of chunk size (CLzma2EncProps::blockSize) is changed + to value larger than (4 GiB - dictionary_size). + + +9.38 beta 2015-01-03 +------------------------- +- The BUG in 9.31-9.37 was fixed: + IArchiveGetRawProps interface was disabled for 7z archives. +- The BUG in 9.26-9.36 was fixed: + Some code in CPP\7zip\Archive\7z\ worked correctly only under Windows. + + +9.36 beta 2014-12-26 +------------------------- +- The BUG in command line version was fixed: + 7-Zip created temporary archive in current folder during update archive + operation, if -w{Path} switch was not specified. + The fixed 7-Zip creates temporary archive in folder that contains updated archive. +- The BUG in 9.33-9.35 was fixed: + 7-Zip silently ignored file reading errors during 7z or gz archive creation, + and the created archive contained only part of file that was read before error. + The fixed 7-Zip stops archive creation and it reports about error. + + +9.35 beta 2014-12-07 +------------------------- +- 7zr.exe now support AES encryption. +- SFX mudules were added to LZMA SDK +- Some bugs were fixed. + + +9.21 beta 2011-04-11 +------------------------- +- New class FString for file names at file systems. +- Speed optimization in CRC code for big-endian CPUs. +- The BUG in Lzma2Dec.c was fixed: + Lzma2Decode function didn't work. + + +9.18 beta 2010-11-02 +------------------------- +- New small SFX module for installers (SfxSetup). + + +9.12 beta 2010-03-24 +------------------------- +- The BUG in LZMA SDK 9.* was fixed: LZMA2 codec didn't work, + if more than 10 threads were used (or more than 20 threads in some modes). + + +9.11 beta 2010-03-15 +------------------------- +- PPMd compression method support + + +9.09 2009-12-12 +------------------------- +- The bug was fixed: + Utf16_To_Utf8 funstions in UTFConvert.cpp and 7zMain.c + incorrectly converted surrogate characters (the code >= 0x10000) to UTF-8. +- Some bugs were fixed + + +9.06 2009-08-17 +------------------------- +- Some changes in ANSI-C 7z Decoder interfaces. + + +9.04 2009-05-30 +------------------------- +- LZMA2 compression method support +- xz format support + + +4.65 2009-02-03 +------------------------- +- Some minor fixes + + +4.63 2008-12-31 +------------------------- +- Some minor fixes + + +4.61 beta 2008-11-23 +------------------------- +- The bug in ANSI-C LZMA Decoder was fixed: + If encoded stream was corrupted, decoder could access memory + outside of allocated range. +- Some changes in ANSI-C 7z Decoder interfaces. +- LZMA SDK is placed in the public domain. + + +4.60 beta 2008-08-19 +------------------------- +- Some minor fixes. + + +4.59 beta 2008-08-13 +------------------------- +- The bug was fixed: + LZMA Encoder in fast compression mode could access memory outside of + allocated range in some rare cases. + + +4.58 beta 2008-05-05 +------------------------- +- ANSI-C LZMA Decoder was rewritten for speed optimizations. +- ANSI-C LZMA Encoder was included to LZMA SDK. +- C++ LZMA code now is just wrapper over ANSI-C code. + + +4.57 2007-12-12 +------------------------- +- Speed optimizations in Ñ++ LZMA Decoder. +- Small changes for more compatibility with some C/C++ compilers. + + +4.49 beta 2007-07-05 +------------------------- +- .7z ANSI-C Decoder: + - now it supports BCJ and BCJ2 filters + - now it supports files larger than 4 GB. + - now it supports "Last Write Time" field for files. +- C++ code for .7z archives compressing/decompressing from 7-zip + was included to LZMA SDK. + + +4.43 2006-06-04 +------------------------- +- Small changes for more compatibility with some C/C++ compilers. + + +4.42 2006-05-15 +------------------------- +- Small changes in .h files in ANSI-C version. + + +4.39 beta 2006-04-14 +------------------------- +- The bug in versions 4.33b:4.38b was fixed: + C++ version of LZMA encoder could not correctly compress + files larger than 2 GB with HC4 match finder (-mfhc4). + + +4.37 beta 2005-04-06 +------------------------- +- Fixes in C++ code: code could no be compiled if _NO_EXCEPTIONS was defined. + + +4.35 beta 2005-03-02 +------------------------- +- The bug was fixed in C++ version of LZMA Decoder: + If encoded stream was corrupted, decoder could access memory + outside of allocated range. + + +4.34 beta 2006-02-27 +------------------------- +- Compressing speed and memory requirements for compressing were increased +- LZMA now can use only these match finders: HC4, BT2, BT3, BT4 + + +4.32 2005-12-09 +------------------------- +- Java version of LZMA SDK was included + + +4.30 2005-11-20 +------------------------- +- Compression ratio was improved in -a2 mode +- Speed optimizations for compressing in -a2 mode +- -fb switch now supports values up to 273 +- The bug in 7z_C (7zIn.c) was fixed: + It used Alloc/Free functions from different memory pools. + So if program used two memory pools, it worked incorrectly. +- 7z_C: .7z format supporting was improved +- LZMA# SDK (C#.NET version) was included + + +4.27 (Updated) 2005-09-21 +------------------------- +- Some GUIDs/interfaces in C++ were changed. + IStream.h: + ISequentialInStream::Read now works as old ReadPart + ISequentialOutStream::Write now works as old WritePart + + +4.27 2005-08-07 +------------------------- +- The bug in LzmaDecodeSize.c was fixed: + if _LZMA_IN_CB and _LZMA_OUT_READ were defined, + decompressing worked incorrectly. + + +4.26 2005-08-05 +------------------------- +- Fixes in 7z_C code and LzmaTest.c: + previous versions could work incorrectly, + if malloc(0) returns 0 + + +4.23 2005-06-29 +------------------------- +- Small fixes in C++ code + + +4.22 2005-06-10 +------------------------- +- Small fixes + + +4.21 2005-06-08 +------------------------- +- Interfaces for ANSI-C LZMA Decoder (LzmaDecode.c) were changed +- New additional version of ANSI-C LZMA Decoder with zlib-like interface: + - LzmaStateDecode.h + - LzmaStateDecode.c + - LzmaStateTest.c +- ANSI-C LZMA Decoder now can decompress files larger than 4 GB + + +4.17 2005-04-18 +------------------------- +- New example for RAM->RAM compressing/decompressing: + LZMA + BCJ (filter for x86 code): + - LzmaRam.h + - LzmaRam.cpp + - LzmaRamDecode.h + - LzmaRamDecode.c + - -f86 switch for lzma.exe + + +4.16 2005-03-29 +------------------------- +- The bug was fixed in LzmaDecode.c (ANSI-C LZMA Decoder): + If _LZMA_OUT_READ was defined, and if encoded stream was corrupted, + decoder could access memory outside of allocated range. +- Speed optimization of ANSI-C LZMA Decoder (now it's about 20% faster). + Old version of LZMA Decoder now is in file LzmaDecodeSize.c. + LzmaDecodeSize.c can provide slightly smaller code than LzmaDecode.c +- Small speed optimization in LZMA C++ code +- filter for SPARC's code was added +- Simplified version of .7z ANSI-C Decoder was included + + +4.06 2004-09-05 +------------------------- +- The bug in v4.05 was fixed: + LZMA-Encoder didn't release output stream in some cases. + + +4.05 2004-08-25 +------------------------- +- Source code of filters for x86, IA-64, ARM, ARM-Thumb + and PowerPC code was included to SDK +- Some internal minor changes + + +4.04 2004-07-28 +------------------------- +- More compatibility with some C++ compilers + + +4.03 2004-06-18 +------------------------- +- "Benchmark" command was added. It measures compressing + and decompressing speed and shows rating values. + Also it checks hardware errors. + + +4.02 2004-06-10 +------------------------- +- C++ LZMA Encoder/Decoder code now is more portable + and it can be compiled by GCC on Linux. + + +4.01 2004-02-15 +------------------------- +- Some detection of data corruption was enabled. + LzmaDecode.c / RangeDecoderReadByte + ..... + { + rd->ExtraBytes = 1; + return 0xFF; + } + + +4.00 2004-02-13 +------------------------- +- Original version of LZMA SDK + + + +HISTORY of the LZMA +------------------- + 2001-2008: Improvements to LZMA compressing/decompressing code, + keeping compatibility with original LZMA format + 1996-2001: Development of LZMA compression format + + Some milestones: + + 2001-08-30: LZMA compression was added to 7-Zip + 1999-01-02: First version of 7-Zip was released + + +End of document diff --git a/cores/saturn/deps/lzma/lzma.txt b/cores/saturn/deps/lzma/lzma.txt new file mode 100644 index 000000000..a65988fe0 --- /dev/null +++ b/cores/saturn/deps/lzma/lzma.txt @@ -0,0 +1,328 @@ +LZMA compression +---------------- +Version: 9.35 + +This file describes LZMA encoding and decoding functions written in C language. + +LZMA is an improved version of famous LZ77 compression algorithm. +It was improved in way of maximum increasing of compression ratio, +keeping high decompression speed and low memory requirements for +decompressing. + +Note: you can read also LZMA Specification (lzma-specification.txt from LZMA SDK) + +Also you can look source code for LZMA encoding and decoding: + C/Util/Lzma/LzmaUtil.c + + +LZMA compressed file format +--------------------------- +Offset Size Description + 0 1 Special LZMA properties (lc,lp, pb in encoded form) + 1 4 Dictionary size (little endian) + 5 8 Uncompressed size (little endian). -1 means unknown size + 13 Compressed data + + + +ANSI-C LZMA Decoder +~~~~~~~~~~~~~~~~~~~ + +Please note that interfaces for ANSI-C code were changed in LZMA SDK 4.58. +If you want to use old interfaces you can download previous version of LZMA SDK +from sourceforge.net site. + +To use ANSI-C LZMA Decoder you need the following files: +1) LzmaDec.h + LzmaDec.c + 7zTypes.h + Precomp.h + Compiler.h + +Look example code: + C/Util/Lzma/LzmaUtil.c + + +Memory requirements for LZMA decoding +------------------------------------- + +Stack usage of LZMA decoding function for local variables is not +larger than 200-400 bytes. + +LZMA Decoder uses dictionary buffer and internal state structure. +Internal state structure consumes + state_size = (4 + (1.5 << (lc + lp))) KB +by default (lc=3, lp=0), state_size = 16 KB. + + +How To decompress data +---------------------- + +LZMA Decoder (ANSI-C version) now supports 2 interfaces: +1) Single-call Decompressing +2) Multi-call State Decompressing (zlib-like interface) + +You must use external allocator: +Example: +void *SzAlloc(void *p, size_t size) { p = p; return malloc(size); } +void SzFree(void *p, void *address) { p = p; free(address); } +ISzAlloc alloc = { SzAlloc, SzFree }; + +You can use p = p; operator to disable compiler warnings. + + +Single-call Decompressing +------------------------- +When to use: RAM->RAM decompressing +Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h +Compile defines: no defines +Memory Requirements: + - Input buffer: compressed size + - Output buffer: uncompressed size + - LZMA Internal Structures: state_size (16 KB for default settings) + +Interface: + int LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAlloc *alloc); + In: + dest - output data + destLen - output data size + src - input data + srcLen - input data size + propData - LZMA properties (5 bytes) + propSize - size of propData buffer (5 bytes) + finishMode - It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). + You can use LZMA_FINISH_END, when you know that + current output buffer covers last bytes of stream. + alloc - Memory allocator. + + Out: + destLen - processed output size + srcLen - processed input size + + Output: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). + + If LZMA decoder sees end_marker before reaching output limit, it returns OK result, + and output value of destLen will be less than output buffer size limit. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + + +Multi-call State Decompressing (zlib-like interface) +---------------------------------------------------- + +When to use: file->file decompressing +Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h + +Memory Requirements: + - Buffer for input stream: any size (for example, 16 KB) + - Buffer for output stream: any size (for example, 16 KB) + - LZMA Internal Structures: state_size (16 KB for default settings) + - LZMA dictionary (dictionary size is encoded in LZMA properties header) + +1) read LZMA properties (5 bytes) and uncompressed size (8 bytes, little-endian) to header: + unsigned char header[LZMA_PROPS_SIZE + 8]; + ReadFile(inFile, header, sizeof(header) + +2) Allocate CLzmaDec structures (state + dictionary) using LZMA properties + + CLzmaDec state; + LzmaDec_Constr(&state); + res = LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc); + if (res != SZ_OK) + return res; + +3) Init LzmaDec structure before any new LZMA stream. And call LzmaDec_DecodeToBuf in loop + + LzmaDec_Init(&state); + for (;;) + { + ... + int res = LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode); + ... + } + + +4) Free all allocated structures + LzmaDec_Free(&state, &g_Alloc); + +Look example code: + C/Util/Lzma/LzmaUtil.c + + +How To compress data +-------------------- + +Compile files: + 7zTypes.h + Threads.h + LzmaEnc.h + LzmaEnc.c + LzFind.h + LzFind.c + LzFindMt.h + LzFindMt.c + LzHash.h + +Memory Requirements: + - (dictSize * 11.5 + 6 MB) + state_size + +Lzma Encoder can use two memory allocators: +1) alloc - for small arrays. +2) allocBig - for big arrays. + +For example, you can use Large RAM Pages (2 MB) in allocBig allocator for +better compression speed. Note that Windows has bad implementation for +Large RAM Pages. +It's OK to use same allocator for alloc and allocBig. + + +Single-call Compression with callbacks +-------------------------------------- + +Look example code: + C/Util/Lzma/LzmaUtil.c + +When to use: file->file compressing + +1) you must implement callback structures for interfaces: +ISeqInStream +ISeqOutStream +ICompressProgress +ISzAlloc + +static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); } +static void SzFree(void *p, void *address) { p = p; MyFree(address); } +static ISzAlloc g_Alloc = { SzAlloc, SzFree }; + + CFileSeqInStream inStream; + CFileSeqOutStream outStream; + + inStream.funcTable.Read = MyRead; + inStream.file = inFile; + outStream.funcTable.Write = MyWrite; + outStream.file = outFile; + + +2) Create CLzmaEncHandle object; + + CLzmaEncHandle enc; + + enc = LzmaEnc_Create(&g_Alloc); + if (enc == 0) + return SZ_ERROR_MEM; + + +3) initialize CLzmaEncProps properties; + + LzmaEncProps_Init(&props); + + Then you can change some properties in that structure. + +4) Send LZMA properties to LZMA Encoder + + res = LzmaEnc_SetProps(enc, &props); + +5) Write encoded properties to header + + Byte header[LZMA_PROPS_SIZE + 8]; + size_t headerSize = LZMA_PROPS_SIZE; + UInt64 fileSize; + int i; + + res = LzmaEnc_WriteProperties(enc, header, &headerSize); + fileSize = MyGetFileLength(inFile); + for (i = 0; i < 8; i++) + header[headerSize++] = (Byte)(fileSize >> (8 * i)); + MyWriteFileAndCheck(outFile, header, headerSize) + +6) Call encoding function: + res = LzmaEnc_Encode(enc, &outStream.funcTable, &inStream.funcTable, + NULL, &g_Alloc, &g_Alloc); + +7) Destroy LZMA Encoder Object + LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc); + + +If callback function return some error code, LzmaEnc_Encode also returns that code +or it can return the code like SZ_ERROR_READ, SZ_ERROR_WRITE or SZ_ERROR_PROGRESS. + + +Single-call RAM->RAM Compression +-------------------------------- + +Single-call RAM->RAM Compression is similar to Compression with callbacks, +but you provide pointers to buffers instead of pointers to stream callbacks: + +SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + +Return code: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) + + + +Defines +------- + +_LZMA_SIZE_OPT - Enable some optimizations in LZMA Decoder to get smaller executable code. + +_LZMA_PROB32 - It can increase the speed on some 32-bit CPUs, but memory usage for + some structures will be doubled in that case. + +_LZMA_UINT32_IS_ULONG - Define it if int is 16-bit on your compiler and long is 32-bit. + +_LZMA_NO_SYSTEM_SIZE_T - Define it if you don't want to use size_t type. + + +_7ZIP_PPMD_SUPPPORT - Define it if you don't want to support PPMD method in AMSI-C .7z decoder. + + +C++ LZMA Encoder/Decoder +~~~~~~~~~~~~~~~~~~~~~~~~ +C++ LZMA code use COM-like interfaces. So if you want to use it, +you can study basics of COM/OLE. +C++ LZMA code is just wrapper over ANSI-C code. + + +C++ Notes +~~~~~~~~~~~~~~~~~~~~~~~~ +If you use some C++ code folders in 7-Zip (for example, C++ code for .7z handling), +you must check that you correctly work with "new" operator. +7-Zip can be compiled with MSVC 6.0 that doesn't throw "exception" from "new" operator. +So 7-Zip uses "CPP\Common\NewHandler.cpp" that redefines "new" operator: +operator new(size_t size) +{ + void *p = ::malloc(size); + if (p == 0) + throw CNewException(); + return p; +} +If you use MSCV that throws exception for "new" operator, you can compile without +"NewHandler.cpp". So standard exception will be used. Actually some code of +7-Zip catches any exception in internal code and converts it to HRESULT code. +So you don't need to catch CNewException, if you call COM interfaces of 7-Zip. + +--- + +http://www.7-zip.org +http://www.7-zip.org/sdk.html +http://www.7-zip.org/support.html diff --git a/cores/saturn/deps/lzma/lzma.vcxproj b/cores/saturn/deps/lzma/lzma.vcxproj new file mode 100644 index 000000000..08bcb149e --- /dev/null +++ b/cores/saturn/deps/lzma/lzma.vcxproj @@ -0,0 +1,543 @@ + + + + + DebugFast + ARM64 + + + DebugFast + Win32 + + + DebugFast + x64 + + + Debug + ARM64 + + + Debug + Win32 + + + Debug + x64 + + + ReleaseLTCG + ARM64 + + + ReleaseLTCG + Win32 + + + ReleaseLTCG + x64 + + + Release + ARM64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + + + + + + {DD944834-7899-4C1C-A4C1-064B5009D239} + Win32Proj + lzma + 10.0 + + + + StaticLibrary + true + v142 + NotSet + + + StaticLibrary + true + v142 + NotSet + + + StaticLibrary + true + v142 + NotSet + + + StaticLibrary + true + v142 + NotSet + + + StaticLibrary + true + v142 + NotSet + + + StaticLibrary + true + v142 + NotSet + + + StaticLibrary + false + v142 + true + NotSet + false + + + StaticLibrary + false + v142 + true + NotSet + false + + + StaticLibrary + false + v142 + true + NotSet + false + + + StaticLibrary + false + v142 + true + NotSet + false + + + StaticLibrary + false + v142 + true + NotSet + false + + + StaticLibrary + false + v142 + true + NotSet + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + true + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + true + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + true + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + true + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + true + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + false + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + + + false + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + false + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + false + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + false + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + $(ProjectName)-$(Platform)-$(Configuration) + false + $(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\ + + + + + + Level2 + Disabled + _7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + true + ProgramDatabase + $(ProjectDir)include;%(AdditionalIncludeDirectories) + false + stdcpp17 + true + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + + + + + + + Level2 + Disabled + _7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + true + ProgramDatabase + $(ProjectDir)include;%(AdditionalIncludeDirectories) + false + stdcpp17 + true + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + + + + + + + Level2 + Disabled + _7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + true + ProgramDatabase + $(ProjectDir)include;%(AdditionalIncludeDirectories) + false + stdcpp17 + true + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + + + + + + + Level2 + Disabled + _7ZIP_ST;_ITERATOR_DEBUG_LEVEL=1;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + true + ProgramDatabase + $(ProjectDir)include;%(AdditionalIncludeDirectories) + Default + false + stdcpp17 + false + true + OnlyExplicitInline + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + + + + + + + Level2 + Disabled + _7ZIP_ST;_ITERATOR_DEBUG_LEVEL=1;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + true + ProgramDatabase + $(ProjectDir)include;%(AdditionalIncludeDirectories) + Default + false + stdcpp17 + false + true + OnlyExplicitInline + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + + + + + + + Level2 + Disabled + _7ZIP_ST;_ITERATOR_DEBUG_LEVEL=1;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + true + ProgramDatabase + $(ProjectDir)include;%(AdditionalIncludeDirectories) + Default + false + stdcpp17 + false + true + OnlyExplicitInline + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + + + + + Level2 + + + MaxSpeed + true + _7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(ProjectDir)include;%(AdditionalIncludeDirectories) + false + stdcpp17 + true + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + true + true + + + + + Level2 + + + MaxSpeed + true + _7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(ProjectDir)include;%(AdditionalIncludeDirectories) + true + stdcpp17 + true + true + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + true + true + + + + + Level2 + + + MaxSpeed + true + _7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(ProjectDir)include;%(AdditionalIncludeDirectories) + false + stdcpp17 + true + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + true + true + + + + + Level2 + + + MaxSpeed + true + _7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(ProjectDir)include;%(AdditionalIncludeDirectories) + false + stdcpp17 + true + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + true + true + + + + + Level2 + + + MaxSpeed + true + _7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(ProjectDir)include;%(AdditionalIncludeDirectories) + true + stdcpp17 + true + true + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + true + true + + + + + Level2 + + + MaxSpeed + true + _7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + $(ProjectDir)include;%(AdditionalIncludeDirectories) + true + stdcpp17 + true + true + true + /Zo /utf-8 %(AdditionalOptions) + + + Windows + true + true + true + + + + + + \ No newline at end of file diff --git a/cores/saturn/deps/lzma/lzma.vcxproj.filters b/cores/saturn/deps/lzma/lzma.vcxproj.filters new file mode 100644 index 000000000..8725b394c --- /dev/null +++ b/cores/saturn/deps/lzma/lzma.vcxproj.filters @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cores/saturn/deps/lzma/src/Alloc.c b/cores/saturn/deps/lzma/src/Alloc.c new file mode 100644 index 000000000..6bbe8c634 --- /dev/null +++ b/cores/saturn/deps/lzma/src/Alloc.c @@ -0,0 +1,307 @@ +/* Alloc.c -- Memory allocation functions +2018-04-27 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#ifdef _WIN32 +#include +#endif +#include + +#include "Alloc.h" + +void *MyAlloc(size_t size) +{ + if (size == 0) + return NULL; + #ifdef _SZ_ALLOC_DEBUG + { + void *p = malloc(size); + return p; + } + #else + return malloc(size); + #endif +} + +void MyFree(void *address) +{ + free(address); +} + +#ifdef _WIN32 + +void *MidAlloc(size_t size) +{ + if (size == 0) + return NULL; + + return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); +} + +void MidFree(void *address) +{ + if (!address) + return; + VirtualFree(address, 0, MEM_RELEASE); +} + +#ifndef MEM_LARGE_PAGES +#undef _7ZIP_LARGE_PAGES +#endif + +#ifdef _7ZIP_LARGE_PAGES +SIZE_T g_LargePageSize = 0; +typedef SIZE_T (WINAPI *GetLargePageMinimumP)(); +#endif + +void SetLargePageSize() +{ + #ifdef _7ZIP_LARGE_PAGES + SIZE_T size; + GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) + GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); + if (!largePageMinimum) + return; + size = largePageMinimum(); + if (size == 0 || (size & (size - 1)) != 0) + return; + g_LargePageSize = size; + #endif +} + + +void *BigAlloc(size_t size) +{ + if (size == 0) + return NULL; + + #ifdef _7ZIP_LARGE_PAGES + { + SIZE_T ps = g_LargePageSize; + if (ps != 0 && ps <= (1 << 30) && size > (ps / 2)) + { + size_t size2; + ps--; + size2 = (size + ps) & ~ps; + if (size2 >= size) + { + void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + if (res) + return res; + } + } + } + #endif + + return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); +} + +void BigFree(void *address) +{ + if (!address) + return; + VirtualFree(address, 0, MEM_RELEASE); +} + +#endif + + +static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); } +static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); } +const ISzAlloc g_Alloc = { SzAlloc, SzFree }; + +static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); } +static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); } +const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; + +static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); } +static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); } +const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; + + +/* + uintptr_t : C99 (optional) + : unsupported in VS6 +*/ + +#ifdef _WIN32 + typedef UINT_PTR UIntPtr; +#else + /* + typedef uintptr_t UIntPtr; + */ + typedef ptrdiff_t UIntPtr; +#endif + + +#define ADJUST_ALLOC_SIZE 0 +/* +#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1) +*/ +/* + Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if + MyAlloc() can return address that is NOT multiple of sizeof(void *). +*/ + + +/* +#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1)))) +*/ +#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1)))) + +#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align) + + +#if (_POSIX_C_SOURCE >= 200112L) && !defined(_WIN32) + #define USE_posix_memalign +#endif + +/* + This posix_memalign() is for test purposes only. + We also need special Free() function instead of free(), + if this posix_memalign() is used. +*/ + +/* +static int posix_memalign(void **ptr, size_t align, size_t size) +{ + size_t newSize = size + align; + void *p; + void *pAligned; + *ptr = NULL; + if (newSize < size) + return 12; // ENOMEM + p = MyAlloc(newSize); + if (!p) + return 12; // ENOMEM + pAligned = MY_ALIGN_PTR_UP_PLUS(p, align); + ((void **)pAligned)[-1] = p; + *ptr = pAligned; + return 0; +} +*/ + +/* + ALLOC_ALIGN_SIZE >= sizeof(void *) + ALLOC_ALIGN_SIZE >= cache_line_size +*/ + +#define ALLOC_ALIGN_SIZE ((size_t)1 << 7) + +static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) +{ + #ifndef USE_posix_memalign + + void *p; + void *pAligned; + size_t newSize; + UNUSED_VAR(pp); + + /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned + block to prevent cache line sharing with another allocated blocks */ + + newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE; + if (newSize < size) + return NULL; + + p = MyAlloc(newSize); + + if (!p) + return NULL; + pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE); + + ((void **)pAligned)[-1] = p; + + return pAligned; + + #else + + void *p; + UNUSED_VAR(pp); + if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size)) + return NULL; + + return p; + + #endif +} + + +static void SzAlignedFree(ISzAllocPtr pp, void *address) +{ + UNUSED_VAR(pp); + #ifndef USE_posix_memalign + if (address) + MyFree(((void **)address)[-1]); + #else + free(address); + #endif +} + + +const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree }; + + + +#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *)) + +/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */ +#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1] +/* +#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1] +*/ + +static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) +{ + CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); + void *adr; + void *pAligned; + size_t newSize; + size_t extra; + size_t alignSize = (size_t)1 << p->numAlignBits; + + if (alignSize < sizeof(void *)) + alignSize = sizeof(void *); + + if (p->offset >= alignSize) + return NULL; + + /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned + block to prevent cache line sharing with another allocated blocks */ + extra = p->offset & (sizeof(void *) - 1); + newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE; + if (newSize < size) + return NULL; + + adr = ISzAlloc_Alloc(p->baseAlloc, newSize); + + if (!adr) + return NULL; + + pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr + + alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset; + + REAL_BLOCK_PTR_VAR(pAligned) = adr; + + return pAligned; +} + + +static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address) +{ + if (address) + { + CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); + ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address)); + } +} + + +void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p) +{ + p->vt.Alloc = AlignOffsetAlloc_Alloc; + p->vt.Free = AlignOffsetAlloc_Free; +} diff --git a/cores/saturn/deps/lzma/src/Bra86.c b/cores/saturn/deps/lzma/src/Bra86.c new file mode 100644 index 000000000..93ed4d762 --- /dev/null +++ b/cores/saturn/deps/lzma/src/Bra86.c @@ -0,0 +1,82 @@ +/* Bra86.c -- Converter for x86 code (BCJ) +2017-04-03 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Bra.h" + +#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0) + +SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding) +{ + SizeT pos = 0; + UInt32 mask = *state & 7; + if (size < 5) + return 0; + size -= 4; + ip += 5; + + for (;;) + { + Byte *p = data + pos; + const Byte *limit = data + size; + for (; p < limit; p++) + if ((*p & 0xFE) == 0xE8) + break; + + { + SizeT d = (SizeT)(p - data - pos); + pos = (SizeT)(p - data); + if (p >= limit) + { + *state = (d > 2 ? 0 : mask >> (unsigned)d); + return pos; + } + if (d > 2) + mask = 0; + else + { + mask >>= (unsigned)d; + if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1]))) + { + mask = (mask >> 1) | 4; + pos++; + continue; + } + } + } + + if (Test86MSByte(p[4])) + { + UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]); + UInt32 cur = ip + (UInt32)pos; + pos += 5; + if (encoding) + v += cur; + else + v -= cur; + if (mask != 0) + { + unsigned sh = (mask & 6) << 2; + if (Test86MSByte((Byte)(v >> sh))) + { + v ^= (((UInt32)0x100 << sh) - 1); + if (encoding) + v += cur; + else + v -= cur; + } + mask = 0; + } + p[1] = (Byte)v; + p[2] = (Byte)(v >> 8); + p[3] = (Byte)(v >> 16); + p[4] = (Byte)(0 - ((v >> 24) & 1)); + } + else + { + mask = (mask >> 1) | 4; + pos++; + } + } +} diff --git a/cores/saturn/deps/lzma/src/BraIA64.c b/cores/saturn/deps/lzma/src/BraIA64.c new file mode 100644 index 000000000..d1dbc62c5 --- /dev/null +++ b/cores/saturn/deps/lzma/src/BraIA64.c @@ -0,0 +1,53 @@ +/* BraIA64.c -- Converter for IA-64 code +2017-01-26 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" +#include "Bra.h" + +SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +{ + SizeT i; + if (size < 16) + return 0; + size -= 16; + i = 0; + do + { + unsigned m = ((UInt32)0x334B0000 >> (data[i] & 0x1E)) & 3; + if (m) + { + m++; + do + { + Byte *p = data + (i + (size_t)m * 5 - 8); + if (((p[3] >> m) & 15) == 5 + && (((p[-1] | ((UInt32)p[0] << 8)) >> m) & 0x70) == 0) + { + unsigned raw = GetUi32(p); + unsigned v = raw >> m; + v = (v & 0xFFFFF) | ((v & (1 << 23)) >> 3); + + v <<= 4; + if (encoding) + v += ip + (UInt32)i; + else + v -= ip + (UInt32)i; + v >>= 4; + + v &= 0x1FFFFF; + v += 0x700000; + v &= 0x8FFFFF; + raw &= ~((UInt32)0x8FFFFF << m); + raw |= (v << m); + SetUi32(p, raw); + } + } + while (++m <= 4); + } + i += 16; + } + while (i <= size); + return i; +} diff --git a/cores/saturn/deps/lzma/src/CpuArch.c b/cores/saturn/deps/lzma/src/CpuArch.c new file mode 100644 index 000000000..02e482e08 --- /dev/null +++ b/cores/saturn/deps/lzma/src/CpuArch.c @@ -0,0 +1,218 @@ +/* CpuArch.c -- CPU specific code +2018-02-18: Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" + +#ifdef MY_CPU_X86_OR_AMD64 + +#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__) +#define USE_ASM +#endif + +#if !defined(USE_ASM) && _MSC_VER >= 1500 +#include +#endif + +#if defined(USE_ASM) && !defined(MY_CPU_AMD64) +static UInt32 CheckFlag(UInt32 flag) +{ + #ifdef _MSC_VER + __asm pushfd; + __asm pop EAX; + __asm mov EDX, EAX; + __asm xor EAX, flag; + __asm push EAX; + __asm popfd; + __asm pushfd; + __asm pop EAX; + __asm xor EAX, EDX; + __asm push EDX; + __asm popfd; + __asm and flag, EAX; + #else + __asm__ __volatile__ ( + "pushf\n\t" + "pop %%EAX\n\t" + "movl %%EAX,%%EDX\n\t" + "xorl %0,%%EAX\n\t" + "push %%EAX\n\t" + "popf\n\t" + "pushf\n\t" + "pop %%EAX\n\t" + "xorl %%EDX,%%EAX\n\t" + "push %%EDX\n\t" + "popf\n\t" + "andl %%EAX, %0\n\t": + "=c" (flag) : "c" (flag) : + "%eax", "%edx"); + #endif + return flag; +} +#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False; +#else +#define CHECK_CPUID_IS_SUPPORTED +#endif + +void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) +{ + #ifdef USE_ASM + + #ifdef _MSC_VER + + UInt32 a2, b2, c2, d2; + __asm xor EBX, EBX; + __asm xor ECX, ECX; + __asm xor EDX, EDX; + __asm mov EAX, function; + __asm cpuid; + __asm mov a2, EAX; + __asm mov b2, EBX; + __asm mov c2, ECX; + __asm mov d2, EDX; + + *a = a2; + *b = b2; + *c = c2; + *d = d2; + + #else + + __asm__ __volatile__ ( + #if defined(MY_CPU_AMD64) && defined(__PIC__) + "mov %%rbx, %%rdi;" + "cpuid;" + "xchg %%rbx, %%rdi;" + : "=a" (*a) , + "=D" (*b) , + #elif defined(MY_CPU_X86) && defined(__PIC__) + "mov %%ebx, %%edi;" + "cpuid;" + "xchgl %%ebx, %%edi;" + : "=a" (*a) , + "=D" (*b) , + #else + "cpuid" + : "=a" (*a) , + "=b" (*b) , + #endif + "=c" (*c) , + "=d" (*d) + : "0" (function)) ; + + #endif + + #else + + int CPUInfo[4]; + __cpuid(CPUInfo, function); + *a = CPUInfo[0]; + *b = CPUInfo[1]; + *c = CPUInfo[2]; + *d = CPUInfo[3]; + + #endif +} + +BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p) +{ + CHECK_CPUID_IS_SUPPORTED + MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]); + MyCPUID(1, &p->ver, &p->b, &p->c, &p->d); + return True; +} + +static const UInt32 kVendors[][3] = +{ + { 0x756E6547, 0x49656E69, 0x6C65746E}, + { 0x68747541, 0x69746E65, 0x444D4163}, + { 0x746E6543, 0x48727561, 0x736C7561} +}; + +int x86cpuid_GetFirm(const Cx86cpuid *p) +{ + unsigned i; + for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++) + { + const UInt32 *v = kVendors[i]; + if (v[0] == p->vendor[0] && + v[1] == p->vendor[1] && + v[2] == p->vendor[2]) + return (int)i; + } + return -1; +} + +BoolInt CPU_Is_InOrder() +{ + Cx86cpuid p; + int firm; + UInt32 family, model; + if (!x86cpuid_CheckAndRead(&p)) + return True; + + family = x86cpuid_GetFamily(p.ver); + model = x86cpuid_GetModel(p.ver); + + firm = x86cpuid_GetFirm(&p); + + switch (firm) + { + case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( + /* In-Order Atom CPU */ + model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */ + || model == 0x26 /* 45 nm, Z6xx */ + || model == 0x27 /* 32 nm, Z2460 */ + || model == 0x35 /* 32 nm, Z2760 */ + || model == 0x36 /* 32 nm, N2xxx, D2xxx */ + ))); + case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); + case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); + } + return True; +} + +#if !defined(MY_CPU_AMD64) && defined(_WIN32) +#include +static BoolInt CPU_Sys_Is_SSE_Supported() +{ + OSVERSIONINFO vi; + vi.dwOSVersionInfoSize = sizeof(vi); + if (!GetVersionEx(&vi)) + return False; + return (vi.dwMajorVersion >= 5); +} +#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; +#else +#define CHECK_SYS_SSE_SUPPORT +#endif + +BoolInt CPU_Is_Aes_Supported() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_CheckAndRead(&p)) + return False; + return (p.c >> 25) & 1; +} + +BoolInt CPU_IsSupported_PageGB() +{ + Cx86cpuid cpuid; + if (!x86cpuid_CheckAndRead(&cpuid)) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]); + if (d[0] < 0x80000001) + return False; + } + { + UInt32 d[4] = { 0 }; + MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]); + return (d[3] >> 26) & 1; + } +} + +#endif diff --git a/cores/saturn/deps/lzma/src/Delta.c b/cores/saturn/deps/lzma/src/Delta.c new file mode 100644 index 000000000..e3edd21ed --- /dev/null +++ b/cores/saturn/deps/lzma/src/Delta.c @@ -0,0 +1,64 @@ +/* Delta.c -- Delta converter +2009-05-26 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Delta.h" + +void Delta_Init(Byte *state) +{ + unsigned i; + for (i = 0; i < DELTA_STATE_SIZE; i++) + state[i] = 0; +} + +static void MyMemCpy(Byte *dest, const Byte *src, unsigned size) +{ + unsigned i; + for (i = 0; i < size; i++) + dest[i] = src[i]; +} + +void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size) +{ + Byte buf[DELTA_STATE_SIZE]; + unsigned j = 0; + MyMemCpy(buf, state, delta); + { + SizeT i; + for (i = 0; i < size;) + { + for (j = 0; j < delta && i < size; i++, j++) + { + Byte b = data[i]; + data[i] = (Byte)(b - buf[j]); + buf[j] = b; + } + } + } + if (j == delta) + j = 0; + MyMemCpy(state, buf + j, delta - j); + MyMemCpy(state + delta - j, buf, j); +} + +void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size) +{ + Byte buf[DELTA_STATE_SIZE]; + unsigned j = 0; + MyMemCpy(buf, state, delta); + { + SizeT i; + for (i = 0; i < size;) + { + for (j = 0; j < delta && i < size; i++, j++) + { + buf[j] = data[i] = (Byte)(buf[j] + data[i]); + } + } + } + if (j == delta) + j = 0; + MyMemCpy(state, buf + j, delta - j); + MyMemCpy(state + delta - j, buf, j); +} diff --git a/cores/saturn/deps/lzma/src/LzFind.c b/cores/saturn/deps/lzma/src/LzFind.c new file mode 100644 index 000000000..df55e86c1 --- /dev/null +++ b/cores/saturn/deps/lzma/src/LzFind.c @@ -0,0 +1,1127 @@ +/* LzFind.c -- Match finder for LZ algorithms +2018-07-08 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "LzFind.h" +#include "LzHash.h" + +#define kEmptyHashValue 0 +#define kMaxValForNormalize ((UInt32)0xFFFFFFFF) +#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ +#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1)) +#define kMaxHistorySize ((UInt32)7 << 29) + +#define kStartMaxLen 3 + +static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) +{ + if (!p->directInput) + { + ISzAlloc_Free(alloc, p->bufferBase); + p->bufferBase = NULL; + } +} + +/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ + +static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc) +{ + UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; + if (p->directInput) + { + p->blockSize = blockSize; + return 1; + } + if (!p->bufferBase || p->blockSize != blockSize) + { + LzInWindow_Free(p, alloc); + p->blockSize = blockSize; + p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize); + } + return (p->bufferBase != NULL); +} + +Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } + +UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } + +void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) +{ + p->posLimit -= subValue; + p->pos -= subValue; + p->streamPos -= subValue; +} + +static void MatchFinder_ReadBlock(CMatchFinder *p) +{ + if (p->streamEndWasReached || p->result != SZ_OK) + return; + + /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */ + + if (p->directInput) + { + UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos); + if (curSize > p->directInputRem) + curSize = (UInt32)p->directInputRem; + p->directInputRem -= curSize; + p->streamPos += curSize; + if (p->directInputRem == 0) + p->streamEndWasReached = 1; + return; + } + + for (;;) + { + Byte *dest = p->buffer + (p->streamPos - p->pos); + size_t size = (p->bufferBase + p->blockSize - dest); + if (size == 0) + return; + + p->result = ISeqInStream_Read(p->stream, dest, &size); + if (p->result != SZ_OK) + return; + if (size == 0) + { + p->streamEndWasReached = 1; + return; + } + p->streamPos += (UInt32)size; + if (p->streamPos - p->pos > p->keepSizeAfter) + return; + } +} + +void MatchFinder_MoveBlock(CMatchFinder *p) +{ + memmove(p->bufferBase, + p->buffer - p->keepSizeBefore, + (size_t)(p->streamPos - p->pos) + p->keepSizeBefore); + p->buffer = p->bufferBase + p->keepSizeBefore; +} + +int MatchFinder_NeedMove(CMatchFinder *p) +{ + if (p->directInput) + return 0; + /* if (p->streamEndWasReached) return 0; */ + return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); +} + +void MatchFinder_ReadIfRequired(CMatchFinder *p) +{ + if (p->streamEndWasReached) + return; + if (p->keepSizeAfter >= p->streamPos - p->pos) + MatchFinder_ReadBlock(p); +} + +static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) +{ + if (MatchFinder_NeedMove(p)) + MatchFinder_MoveBlock(p); + MatchFinder_ReadBlock(p); +} + +static void MatchFinder_SetDefaultSettings(CMatchFinder *p) +{ + p->cutValue = 32; + p->btMode = 1; + p->numHashBytes = 4; + p->bigHash = 0; +} + +#define kCrcPoly 0xEDB88320 + +void MatchFinder_Construct(CMatchFinder *p) +{ + unsigned i; + p->bufferBase = NULL; + p->directInput = 0; + p->hash = NULL; + p->expectedDataSize = (UInt64)(Int64)-1; + MatchFinder_SetDefaultSettings(p); + + for (i = 0; i < 256; i++) + { + UInt32 r = (UInt32)i; + unsigned j; + for (j = 0; j < 8; j++) + r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); + p->crc[i] = r; + } +} + +static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->hash); + p->hash = NULL; +} + +void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) +{ + MatchFinder_FreeThisClassMemory(p, alloc); + LzInWindow_Free(p, alloc); +} + +static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) +{ + size_t sizeInBytes = (size_t)num * sizeof(CLzRef); + if (sizeInBytes / sizeof(CLzRef) != num) + return NULL; + return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); +} + +int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAllocPtr alloc) +{ + UInt32 sizeReserv; + + if (historySize > kMaxHistorySize) + { + MatchFinder_Free(p, alloc); + return 0; + } + + sizeReserv = historySize >> 1; + if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3; + else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2; + + sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); + + p->keepSizeBefore = historySize + keepAddBufferBefore + 1; + p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; + + /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ + + if (LzInWindow_Create(p, sizeReserv, alloc)) + { + UInt32 newCyclicBufferSize = historySize + 1; + UInt32 hs; + p->matchMaxLen = matchMaxLen; + { + p->fixedHashSize = 0; + if (p->numHashBytes == 2) + hs = (1 << 16) - 1; + else + { + hs = historySize; + if (hs > p->expectedDataSize) + hs = (UInt32)p->expectedDataSize; + if (hs != 0) + hs--; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + hs >>= 1; + hs |= 0xFFFF; /* don't change it! It's required for Deflate */ + if (hs > (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + else + hs >>= 1; + /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ + } + } + p->hashMask = hs; + hs++; + if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; + if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; + if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; + hs += p->fixedHashSize; + } + + { + size_t newSize; + size_t numSons; + p->historySize = historySize; + p->hashSizeSum = hs; + p->cyclicBufferSize = newCyclicBufferSize; + + numSons = newCyclicBufferSize; + if (p->btMode) + numSons <<= 1; + newSize = hs + numSons; + + if (p->hash && p->numRefs == newSize) + return 1; + + MatchFinder_FreeThisClassMemory(p, alloc); + p->numRefs = newSize; + p->hash = AllocRefs(newSize, alloc); + + if (p->hash) + { + p->son = p->hash + p->hashSizeSum; + return 1; + } + } + } + + MatchFinder_Free(p, alloc); + return 0; +} + +static void MatchFinder_SetLimits(CMatchFinder *p) +{ + UInt32 limit = kMaxValForNormalize - p->pos; + UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; + + if (limit2 < limit) + limit = limit2; + limit2 = p->streamPos - p->pos; + + if (limit2 <= p->keepSizeAfter) + { + if (limit2 > 0) + limit2 = 1; + } + else + limit2 -= p->keepSizeAfter; + + if (limit2 < limit) + limit = limit2; + + { + UInt32 lenLimit = p->streamPos - p->pos; + if (lenLimit > p->matchMaxLen) + lenLimit = p->matchMaxLen; + p->lenLimit = lenLimit; + } + p->posLimit = p->pos + limit; +} + + +void MatchFinder_Init_LowHash(CMatchFinder *p) +{ + size_t i; + CLzRef *items = p->hash; + size_t numItems = p->fixedHashSize; + for (i = 0; i < numItems; i++) + items[i] = kEmptyHashValue; +} + + +void MatchFinder_Init_HighHash(CMatchFinder *p) +{ + size_t i; + CLzRef *items = p->hash + p->fixedHashSize; + size_t numItems = (size_t)p->hashMask + 1; + for (i = 0; i < numItems; i++) + items[i] = kEmptyHashValue; +} + + +void MatchFinder_Init_3(CMatchFinder *p, int readData) +{ + p->cyclicBufferPos = 0; + p->buffer = p->bufferBase; + p->pos = + p->streamPos = p->cyclicBufferSize; + p->result = SZ_OK; + p->streamEndWasReached = 0; + + if (readData) + MatchFinder_ReadBlock(p); + + MatchFinder_SetLimits(p); +} + + +void MatchFinder_Init(CMatchFinder *p) +{ + MatchFinder_Init_HighHash(p); + MatchFinder_Init_LowHash(p); + MatchFinder_Init_3(p, True); +} + + +static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) +{ + return (p->pos - p->historySize - 1) & kNormalizeMask; +} + +void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) +{ + size_t i; + for (i = 0; i < numItems; i++) + { + UInt32 value = items[i]; + if (value <= subValue) + value = kEmptyHashValue; + else + value -= subValue; + items[i] = value; + } +} + +static void MatchFinder_Normalize(CMatchFinder *p) +{ + UInt32 subValue = MatchFinder_GetSubValue(p); + MatchFinder_Normalize3(subValue, p->hash, p->numRefs); + MatchFinder_ReduceOffsets(p, subValue); +} + + +MY_NO_INLINE +static void MatchFinder_CheckLimits(CMatchFinder *p) +{ + if (p->pos == kMaxValForNormalize) + MatchFinder_Normalize(p); + if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) + MatchFinder_CheckAndMoveAndRead(p); + if (p->cyclicBufferPos == p->cyclicBufferSize) + p->cyclicBufferPos = 0; + MatchFinder_SetLimits(p); +} + + +/* + (lenLimit > maxLen) +*/ +MY_FORCE_INLINE +static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *distances, unsigned maxLen) +{ + /* + son[_cyclicBufferPos] = curMatch; + for (;;) + { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + return distances; + { + const Byte *pb = cur - delta; + curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + if (pb[maxLen] == cur[maxLen] && *pb == *cur) + { + UInt32 len = 0; + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + maxLen = len; + *distances++ = len; + *distances++ = delta - 1; + if (len == lenLimit) + return distances; + } + } + } + } + */ + + const Byte *lim = cur + lenLimit; + son[_cyclicBufferPos] = curMatch; + do + { + UInt32 delta = pos - curMatch; + if (delta >= _cyclicBufferSize) + break; + { + ptrdiff_t diff; + curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + diff = (ptrdiff_t)0 - delta; + if (cur[maxLen] == cur[maxLen + diff]) + { + const Byte *c = cur; + while (*c == c[diff]) + { + if (++c == lim) + { + distances[0] = (UInt32)(lim - cur); + distances[1] = delta - 1; + return distances + 2; + } + } + { + unsigned len = (unsigned)(c - cur); + if (maxLen < len) + { + maxLen = len; + distances[0] = (UInt32)len; + distances[1] = delta - 1; + distances += 2; + } + } + } + } + } + while (--cutValue); + + return distances; +} + + +MY_FORCE_INLINE +UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *distances, UInt32 maxLen) +{ + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + unsigned len0 = 0, len1 = 0; + for (;;) + { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + { + *ptr0 = *ptr1 = kEmptyHashValue; + return distances; + } + { + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const Byte *pb = cur - delta; + unsigned len = (len0 < len1 ? len0 : len1); + UInt32 pair0 = pair[0]; + if (pb[len] == cur[len]) + { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + maxLen = (UInt32)len; + *distances++ = (UInt32)len; + *distances++ = delta - 1; + if (len == lenLimit) + { + *ptr1 = pair0; + *ptr0 = pair[1]; + return distances; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } + else + { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } +} + +static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) +{ + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + unsigned len0 = 0, len1 = 0; + for (;;) + { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + { + *ptr0 = *ptr1 = kEmptyHashValue; + return; + } + { + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const Byte *pb = cur - delta; + unsigned len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) + { + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + { + if (len == lenLimit) + { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } + else + { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } +} + +#define MOVE_POS \ + ++p->cyclicBufferPos; \ + p->buffer++; \ + if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p); + +#define MOVE_POS_RET MOVE_POS return (UInt32)offset; + +static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } + +#define GET_MATCHES_HEADER2(minLen, ret_op) \ + unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \ + lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ + cur = p->buffer; + +#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) +#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) + +#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue + +#define GET_MATCHES_FOOTER(offset, maxLen) \ + offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \ + distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET; + +#define SKIP_FOOTER \ + SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; + +#define UPDATE_maxLen { \ + ptrdiff_t diff = (ptrdiff_t)0 - d2; \ + const Byte *c = cur + maxLen; \ + const Byte *lim = cur + lenLimit; \ + for (; c != lim; c++) if (*(c + diff) != *c) break; \ + maxLen = (unsigned)(c - cur); } + +static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + unsigned offset; + GET_MATCHES_HEADER(2) + HASH2_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + offset = 0; + GET_MATCHES_FOOTER(offset, 1) +} + +UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + unsigned offset; + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + offset = 0; + GET_MATCHES_FOOTER(offset, 2) +} + +static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 h2, d2, pos; + unsigned maxLen, offset; + UInt32 *hash; + GET_MATCHES_HEADER(3) + + HASH3_CALC; + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash[h2]; + + curMatch = (hash + kFix3HashSize)[hv]; + + hash[h2] = pos; + (hash + kFix3HashSize)[hv] = pos; + + maxLen = 2; + offset = 0; + + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + UPDATE_maxLen + distances[0] = (UInt32)maxLen; + distances[1] = d2 - 1; + offset = 2; + if (maxLen == lenLimit) + { + SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); + MOVE_POS_RET; + } + } + + GET_MATCHES_FOOTER(offset, maxLen) +} + +static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen, offset; + UInt32 *hash; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + + curMatch = (hash + kFix4HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[hv] = pos; + + maxLen = 0; + offset = 0; + + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + maxLen = 2; + distances[0] = 2; + distances[1] = d2 - 1; + offset = 2; + } + + if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + maxLen = 3; + distances[(size_t)offset + 1] = d3 - 1; + offset += 2; + d2 = d3; + } + + if (offset != 0) + { + UPDATE_maxLen + distances[(size_t)offset - 2] = (UInt32)maxLen; + if (maxLen == lenLimit) + { + SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); + MOVE_POS_RET; + } + } + + if (maxLen < 3) + maxLen = 3; + + GET_MATCHES_FOOTER(offset, maxLen) +} + +/* +static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos; + UInt32 *hash; + GET_MATCHES_HEADER(5) + + HASH5_CALC; + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + d4 = pos - (hash + kFix4HashSize)[h4]; + + curMatch = (hash + kFix5HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[h4] = pos; + (hash + kFix5HashSize)[hv] = pos; + + maxLen = 0; + offset = 0; + + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + distances[0] = maxLen = 2; + distances[1] = d2 - 1; + offset = 2; + if (*(cur - d2 + 2) == cur[2]) + distances[0] = maxLen = 3; + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + distances[2] = maxLen = 3; + distances[3] = d3 - 1; + offset = 4; + d2 = d3; + } + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + distances[0] = maxLen = 3; + distances[1] = d3 - 1; + offset = 2; + d2 = d3; + } + + if (d2 != d4 && d4 < p->cyclicBufferSize + && *(cur - d4) == *cur + && *(cur - d4 + 3) == *(cur + 3)) + { + maxLen = 4; + distances[(size_t)offset + 1] = d4 - 1; + offset += 2; + d2 = d4; + } + + if (offset != 0) + { + UPDATE_maxLen + distances[(size_t)offset - 2] = maxLen; + if (maxLen == lenLimit) + { + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + MOVE_POS_RET; + } + } + + if (maxLen < 4) + maxLen = 4; + + GET_MATCHES_FOOTER(offset, maxLen) +} +*/ + +static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen, offset; + UInt32 *hash; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + curMatch = (hash + kFix4HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[hv] = pos; + + maxLen = 0; + offset = 0; + + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + maxLen = 2; + distances[0] = 2; + distances[1] = d2 - 1; + offset = 2; + } + + if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + maxLen = 3; + distances[(size_t)offset + 1] = d3 - 1; + offset += 2; + d2 = d3; + } + + if (offset != 0) + { + UPDATE_maxLen + distances[(size_t)offset - 2] = (UInt32)maxLen; + if (maxLen == lenLimit) + { + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS_RET; + } + } + + if (maxLen < 3) + maxLen = 3; + + offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), + distances + offset, maxLen) - (distances)); + MOVE_POS_RET +} + +/* +static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos + UInt32 *hash; + GET_MATCHES_HEADER(5) + + HASH5_CALC; + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + d4 = pos - (hash + kFix4HashSize)[h4]; + + curMatch = (hash + kFix5HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[h4] = pos; + (hash + kFix5HashSize)[hv] = pos; + + maxLen = 0; + offset = 0; + + if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + { + distances[0] = maxLen = 2; + distances[1] = d2 - 1; + offset = 2; + if (*(cur - d2 + 2) == cur[2]) + distances[0] = maxLen = 3; + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + distances[2] = maxLen = 3; + distances[3] = d3 - 1; + offset = 4; + d2 = d3; + } + } + else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + { + distances[0] = maxLen = 3; + distances[1] = d3 - 1; + offset = 2; + d2 = d3; + } + + if (d2 != d4 && d4 < p->cyclicBufferSize + && *(cur - d4) == *cur + && *(cur - d4 + 3) == *(cur + 3)) + { + maxLen = 4; + distances[(size_t)offset + 1] = d4 - 1; + offset += 2; + d2 = d4; + } + + if (offset != 0) + { + UPDATE_maxLen + distances[(size_t)offset - 2] = maxLen; + if (maxLen == lenLimit) + { + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS_RET; + } + } + + if (maxLen < 4) + maxLen = 4; + + offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), + distances + offset, maxLen) - (distances)); + MOVE_POS_RET +} +*/ + +UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + unsigned offset; + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), + distances, 2) - (distances)); + MOVE_POS_RET +} + +static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + SKIP_HEADER(2) + HASH2_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + SKIP_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + UInt32 h2; + UInt32 *hash; + SKIP_HEADER(3) + HASH3_CALC; + hash = p->hash; + curMatch = (hash + kFix3HashSize)[hv]; + hash[h2] = + (hash + kFix3HashSize)[hv] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + UInt32 h2, h3; + UInt32 *hash; + SKIP_HEADER(4) + HASH4_CALC; + hash = p->hash; + curMatch = (hash + kFix4HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[hv] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +/* +static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + UInt32 h2, h3, h4; + UInt32 *hash; + SKIP_HEADER(5) + HASH5_CALC; + hash = p->hash; + curMatch = (hash + kFix5HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[h4] = + (hash + kFix5HashSize)[hv] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} +*/ + +static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + UInt32 h2, h3; + UInt32 *hash; + SKIP_HEADER(4) + HASH4_CALC; + hash = p->hash; + curMatch = (hash + kFix4HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[hv] = p->pos; + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS + } + while (--num != 0); +} + +/* +static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + UInt32 h2, h3, h4; + UInt32 *hash; + SKIP_HEADER(5) + HASH5_CALC; + hash = p->hash; + curMatch = hash + kFix5HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[h4] = + (hash + kFix5HashSize)[hv] = p->pos; + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS + } + while (--num != 0); +} +*/ + +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + SKIP_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS + } + while (--num != 0); +} + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) +{ + vTable->Init = (Mf_Init_Func)MatchFinder_Init; + vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; + if (!p->btMode) + { + /* if (p->numHashBytes <= 4) */ + { + vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; + } + /* + else + { + vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; + } + */ + } + else if (p->numHashBytes == 2) + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; + } + else if (p->numHashBytes == 3) + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; + } + else /* if (p->numHashBytes == 4) */ + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; + } + /* + else + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; + } + */ +} diff --git a/cores/saturn/deps/lzma/src/Lzma86Dec.c b/cores/saturn/deps/lzma/src/Lzma86Dec.c new file mode 100644 index 000000000..21031745c --- /dev/null +++ b/cores/saturn/deps/lzma/src/Lzma86Dec.c @@ -0,0 +1,54 @@ +/* Lzma86Dec.c -- LZMA + x86 (BCJ) Filter Decoder +2016-05-16 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "Lzma86.h" + +#include "Alloc.h" +#include "Bra.h" +#include "LzmaDec.h" + +SRes Lzma86_GetUnpackSize(const Byte *src, SizeT srcLen, UInt64 *unpackSize) +{ + unsigned i; + if (srcLen < LZMA86_HEADER_SIZE) + return SZ_ERROR_INPUT_EOF; + *unpackSize = 0; + for (i = 0; i < sizeof(UInt64); i++) + *unpackSize += ((UInt64)src[LZMA86_SIZE_OFFSET + i]) << (8 * i); + return SZ_OK; +} + +SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen) +{ + SRes res; + int useFilter; + SizeT inSizePure; + ELzmaStatus status; + + if (*srcLen < LZMA86_HEADER_SIZE) + return SZ_ERROR_INPUT_EOF; + + useFilter = src[0]; + + if (useFilter > 1) + { + *destLen = 0; + return SZ_ERROR_UNSUPPORTED; + } + + inSizePure = *srcLen - LZMA86_HEADER_SIZE; + res = LzmaDecode(dest, destLen, src + LZMA86_HEADER_SIZE, &inSizePure, + src + 1, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status, &g_Alloc); + *srcLen = inSizePure + LZMA86_HEADER_SIZE; + if (res != SZ_OK) + return res; + if (useFilter == 1) + { + UInt32 x86State; + x86_Convert_Init(x86State); + x86_Convert(dest, *destLen, 0, &x86State, 0); + } + return SZ_OK; +} diff --git a/cores/saturn/deps/lzma/src/Lzma86Enc.c b/cores/saturn/deps/lzma/src/Lzma86Enc.c new file mode 100644 index 000000000..2617bab8e --- /dev/null +++ b/cores/saturn/deps/lzma/src/Lzma86Enc.c @@ -0,0 +1,106 @@ +/* Lzma86Enc.c -- LZMA + x86 (BCJ) Filter Encoder +2018-07-04 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#include "Lzma86.h" + +#include "Alloc.h" +#include "Bra.h" +#include "LzmaEnc.h" + +#define SZE_OUT_OVERFLOW SZE_DATA_ERROR + +int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen, + int level, UInt32 dictSize, int filterMode) +{ + size_t outSize2 = *destLen; + Byte *filteredStream; + BoolInt useFilter; + int mainResult = SZ_ERROR_OUTPUT_EOF; + CLzmaEncProps props; + LzmaEncProps_Init(&props); + props.level = level; + props.dictSize = dictSize; + + *destLen = 0; + if (outSize2 < LZMA86_HEADER_SIZE) + return SZ_ERROR_OUTPUT_EOF; + + { + int i; + UInt64 t = srcLen; + for (i = 0; i < 8; i++, t >>= 8) + dest[LZMA86_SIZE_OFFSET + i] = (Byte)t; + } + + filteredStream = 0; + useFilter = (filterMode != SZ_FILTER_NO); + if (useFilter) + { + if (srcLen != 0) + { + filteredStream = (Byte *)MyAlloc(srcLen); + if (filteredStream == 0) + return SZ_ERROR_MEM; + memcpy(filteredStream, src, srcLen); + } + { + UInt32 x86State; + x86_Convert_Init(x86State); + x86_Convert(filteredStream, srcLen, 0, &x86State, 1); + } + } + + { + size_t minSize = 0; + BoolInt bestIsFiltered = False; + + /* passes for SZ_FILTER_AUTO: + 0 - BCJ + LZMA + 1 - LZMA + 2 - BCJ + LZMA agaian, if pass 0 (BCJ + LZMA) is better. + */ + int numPasses = (filterMode == SZ_FILTER_AUTO) ? 3 : 1; + + int i; + for (i = 0; i < numPasses; i++) + { + size_t outSizeProcessed = outSize2 - LZMA86_HEADER_SIZE; + size_t outPropsSize = 5; + SRes curRes; + BoolInt curModeIsFiltered = (numPasses > 1 && i == numPasses - 1); + if (curModeIsFiltered && !bestIsFiltered) + break; + if (useFilter && i == 0) + curModeIsFiltered = True; + + curRes = LzmaEncode(dest + LZMA86_HEADER_SIZE, &outSizeProcessed, + curModeIsFiltered ? filteredStream : src, srcLen, + &props, dest + 1, &outPropsSize, 0, + NULL, &g_Alloc, &g_Alloc); + + if (curRes != SZ_ERROR_OUTPUT_EOF) + { + if (curRes != SZ_OK) + { + mainResult = curRes; + break; + } + if (outSizeProcessed <= minSize || mainResult != SZ_OK) + { + minSize = outSizeProcessed; + bestIsFiltered = curModeIsFiltered; + mainResult = SZ_OK; + } + } + } + dest[0] = (Byte)(bestIsFiltered ? 1 : 0); + *destLen = LZMA86_HEADER_SIZE + minSize; + } + if (useFilter) + MyFree(filteredStream); + return mainResult; +} diff --git a/cores/saturn/deps/lzma/src/LzmaDec.c b/cores/saturn/deps/lzma/src/LzmaDec.c new file mode 100644 index 000000000..ba3e1dd50 --- /dev/null +++ b/cores/saturn/deps/lzma/src/LzmaDec.c @@ -0,0 +1,1185 @@ +/* LzmaDec.c -- LZMA Decoder +2018-07-04 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +/* #include "CpuArch.h" */ +#include "LzmaDec.h" + +#define kNumTopBits 24 +#define kTopValue ((UInt32)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 + +#define RC_INIT_SIZE 5 + +#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) +#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); +#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); +#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ + { UPDATE_0(p); i = (i + i); A0; } else \ + { UPDATE_1(p); i = (i + i) + 1; A1; } + +#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } + +#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ + { UPDATE_0(p + i); A0; } else \ + { UPDATE_1(p + i); A1; } +#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) +#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) +#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) + +#define TREE_DECODE(probs, limit, i) \ + { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } + +/* #define _LZMA_SIZE_OPT */ + +#ifdef _LZMA_SIZE_OPT +#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) +#else +#define TREE_6_DECODE(probs, i) \ + { i = 1; \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + i -= 0x40; } +#endif + +#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol) +#define MATCHED_LITER_DEC \ + matchByte += matchByte; \ + bit = offs; \ + offs &= matchByte; \ + probLit = prob + (offs + bit + symbol); \ + GET_BIT2(probLit, symbol, offs ^= bit; , ;) + + + +#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) +#define UPDATE_0_CHECK range = bound; +#define UPDATE_1_CHECK range -= bound; code -= bound; +#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ + { UPDATE_0_CHECK; i = (i + i); A0; } else \ + { UPDATE_1_CHECK; i = (i + i) + 1; A1; } +#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) +#define TREE_DECODE_CHECK(probs, limit, i) \ + { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } + + +#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ + { UPDATE_0_CHECK; i += m; m += m; } else \ + { UPDATE_1_CHECK; m += m; i += m; } + + +#define kNumPosBitsMax 4 +#define kNumPosStatesMax (1 << kNumPosBitsMax) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define LenLow 0 +#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits)) +#define kNumLenProbs (LenHigh + kLenNumHighSymbols) + +#define LenChoice LenLow +#define LenChoice2 (LenLow + (1 << kLenNumLowBits)) + +#define kNumStates 12 +#define kNumStates2 16 +#define kNumLitStates 7 + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +#define kNumPosSlotBits 6 +#define kNumLenToPosStates 4 + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) + +#define kMatchMinLen 2 +#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) + +/* External ASM code needs same CLzmaProb array layout. So don't change it. */ + +/* (probs_1664) is faster and better for code size at some platforms */ +/* +#ifdef MY_CPU_X86_OR_AMD64 +*/ +#define kStartOffset 1664 +#define GET_PROBS p->probs_1664 +/* +#define GET_PROBS p->probs + kStartOffset +#else +#define kStartOffset 0 +#define GET_PROBS p->probs +#endif +*/ + +#define SpecPos (-kStartOffset) +#define IsRep0Long (SpecPos + kNumFullDistances) +#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax)) +#define LenCoder (RepLenCoder + kNumLenProbs) +#define IsMatch (LenCoder + kNumLenProbs) +#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax)) +#define IsRep (Align + kAlignTableSize) +#define IsRepG0 (IsRep + kNumStates) +#define IsRepG1 (IsRepG0 + kNumStates) +#define IsRepG2 (IsRepG1 + kNumStates) +#define PosSlot (IsRepG2 + kNumStates) +#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +#define NUM_BASE_PROBS (Literal + kStartOffset) + +#if Align != 0 && kStartOffset != 0 + #error Stop_Compiling_Bad_LZMA_kAlign +#endif + +#if NUM_BASE_PROBS != 1984 + #error Stop_Compiling_Bad_LZMA_PROBS +#endif + + +#define LZMA_LIT_SIZE 0x300 + +#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) + + +#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4) +#define COMBINED_PS_STATE (posState + state) +#define GET_LEN_STATE (posState) + +#define LZMA_DIC_MIN (1 << 12) + +/* +p->remainLen : shows status of LZMA decoder: + < kMatchSpecLenStart : normal remain + = kMatchSpecLenStart : finished + = kMatchSpecLenStart + 1 : need init range coder + = kMatchSpecLenStart + 2 : need init range coder and state +*/ + +/* ---------- LZMA_DECODE_REAL ---------- */ +/* +LzmaDec_DecodeReal_3() can be implemented in external ASM file. +3 - is the code compatibility version of that function for check at link time. +*/ + +#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3 + +/* +LZMA_DECODE_REAL() +In: + RangeCoder is normalized + if (p->dicPos == limit) + { + LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases. + So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol + is not END_OF_PAYALOAD_MARKER, then function returns error code. + } + +Processing: + first LZMA symbol will be decoded in any case + All checks for limits are at the end of main loop, + It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit), + RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. + +Out: + RangeCoder is normalized + Result: + SZ_OK - OK + SZ_ERROR_DATA - Error + p->remainLen: + < kMatchSpecLenStart : normal remain + = kMatchSpecLenStart : finished +*/ + + +#ifdef _LZMA_DEC_OPT + +int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); + +#else + +static +int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; + UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; + unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; + unsigned lc = p->prop.lc; + unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); + + Byte *dic = p->dic; + SizeT dicBufSize = p->dicBufSize; + SizeT dicPos = p->dicPos; + + UInt32 processedPos = p->processedPos; + UInt32 checkDicSize = p->checkDicSize; + unsigned len = 0; + + const Byte *buf = p->buf; + UInt32 range = p->range; + UInt32 code = p->code; + + do + { + CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = CALC_POS_STATE(processedPos, pbMask); + + prob = probs + IsMatch + COMBINED_PS_STATE; + IF_BIT_0(prob) + { + unsigned symbol; + UPDATE_0(prob); + prob = probs + Literal; + if (processedPos != 0 || checkDicSize != 0) + prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); + processedPos++; + + if (state < kNumLitStates) + { + state -= (state < 4) ? state : 3; + symbol = 1; + #ifdef _LZMA_SIZE_OPT + do { NORMAL_LITER_DEC } while (symbol < 0x100); + #else + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + #endif + } + else + { + unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + unsigned offs = 0x100; + state -= (state < 10) ? 3 : 6; + symbol = 1; + #ifdef _LZMA_SIZE_OPT + do + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + } + while (symbol < 0x100); + #else + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + } + #endif + } + + dic[dicPos++] = (Byte)symbol; + continue; + } + + { + UPDATE_1(prob); + prob = probs + IsRep + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + state += kNumStates; + prob = probs + LenCoder; + } + else + { + UPDATE_1(prob); + /* + // that case was checked before with kBadRepCode + if (checkDicSize == 0 && processedPos == 0) + return SZ_ERROR_DATA; + */ + prob = probs + IsRepG0 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + prob = probs + IsRep0Long + COMBINED_PS_STATE; + IF_BIT_0(prob) + { + UPDATE_0(prob); + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + processedPos++; + state = state < kNumLitStates ? 9 : 11; + continue; + } + UPDATE_1(prob); + } + else + { + UInt32 distance; + UPDATE_1(prob); + prob = probs + IsRepG1 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep1; + } + else + { + UPDATE_1(prob); + prob = probs + IsRepG2 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep2; + } + else + { + UPDATE_1(prob); + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < kNumLitStates ? 8 : 11; + prob = probs + RepLenCoder; + } + + #ifdef _LZMA_SIZE_OPT + { + unsigned lim, offset; + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + GET_LEN_STATE; + offset = 0; + lim = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + offset = kLenNumLowSymbols; + lim = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenHigh; + offset = kLenNumLowSymbols * 2; + lim = (1 << kLenNumHighBits); + } + } + TREE_DECODE(probLen, lim, len); + len += offset; + } + #else + { + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + GET_LEN_STATE; + len = 1; + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + len -= 8; + } + else + { + UPDATE_1(probLen); + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + len = 1; + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenHigh; + TREE_DECODE(probLen, (1 << kLenNumHighBits), len); + len += kLenNumLowSymbols * 2; + } + } + } + #endif + + if (state >= kNumStates) + { + UInt32 distance; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); + TREE_6_DECODE(prob, distance); + if (distance >= kStartPosModelIndex) + { + unsigned posSlot = (unsigned)distance; + unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); + distance = (2 | (distance & 1)); + if (posSlot < kEndPosModelIndex) + { + distance <<= numDirectBits; + prob = probs + SpecPos; + { + UInt32 m = 1; + distance++; + do + { + REV_BIT_VAR(prob, distance, m); + } + while (--numDirectBits); + distance -= m; + } + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE + range >>= 1; + + { + UInt32 t; + code -= range; + t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ + distance = (distance << 1) + (t + 1); + code += range & t; + } + /* + distance <<= 1; + if (code >= range) + { + code -= range; + distance |= 1; + } + */ + } + while (--numDirectBits); + prob = probs + Align; + distance <<= kNumAlignBits; + { + unsigned i = 1; + REV_BIT_CONST(prob, i, 1); + REV_BIT_CONST(prob, i, 2); + REV_BIT_CONST(prob, i, 4); + REV_BIT_LAST (prob, i, 8); + distance |= i; + } + if (distance == (UInt32)0xFFFFFFFF) + { + len = kMatchSpecLenStart; + state -= kNumStates; + break; + } + } + } + + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance + 1; + state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; + if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) + { + p->dicPos = dicPos; + return SZ_ERROR_DATA; + } + } + + len += kMatchMinLen; + + { + SizeT rem; + unsigned curLen; + SizeT pos; + + if ((rem = limit - dicPos) == 0) + { + p->dicPos = dicPos; + return SZ_ERROR_DATA; + } + + curLen = ((rem < len) ? (unsigned)rem : len); + pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); + + processedPos += (UInt32)curLen; + + len -= curLen; + if (curLen <= dicBufSize - pos) + { + Byte *dest = dic + dicPos; + ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; + const Byte *lim = dest + curLen; + dicPos += (SizeT)curLen; + do + *(dest) = (Byte)*(dest + src); + while (++dest != lim); + } + else + { + do + { + dic[dicPos++] = dic[pos]; + if (++pos == dicBufSize) + pos = 0; + } + while (--curLen != 0); + } + } + } + } + while (dicPos < limit && buf < bufLimit); + + NORMALIZE; + + p->buf = buf; + p->range = range; + p->code = code; + p->remainLen = (UInt32)len; + p->dicPos = dicPos; + p->processedPos = processedPos; + p->reps[0] = rep0; + p->reps[1] = rep1; + p->reps[2] = rep2; + p->reps[3] = rep3; + p->state = (UInt32)state; + + return SZ_OK; +} +#endif + +static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) +{ + if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) + { + Byte *dic = p->dic; + SizeT dicPos = p->dicPos; + SizeT dicBufSize = p->dicBufSize; + unsigned len = (unsigned)p->remainLen; + SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ + SizeT rem = limit - dicPos; + if (rem < len) + len = (unsigned)(rem); + + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) + p->checkDicSize = p->prop.dicSize; + + p->processedPos += (UInt32)len; + p->remainLen -= (UInt32)len; + while (len != 0) + { + len--; + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + } + p->dicPos = dicPos; + } +} + + +#define kRange0 0xFFFFFFFF +#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) +#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))) +#if kBadRepCode != (0xC0000000 - 0x400) + #error Stop_Compiling_Bad_LZMA_Check +#endif + +static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + do + { + SizeT limit2 = limit; + if (p->checkDicSize == 0) + { + UInt32 rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit2 = p->dicPos + rem; + + if (p->processedPos == 0) + if (p->code >= kBadRepCode) + return SZ_ERROR_DATA; + } + + RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit)); + + if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) + p->checkDicSize = p->prop.dicSize; + + LzmaDec_WriteRem(p, limit); + } + while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); + + return 0; +} + +typedef enum +{ + DUMMY_ERROR, /* unexpected end of input stream */ + DUMMY_LIT, + DUMMY_MATCH, + DUMMY_REP +} ELzmaDummy; + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) +{ + UInt32 range = p->range; + UInt32 code = p->code; + const Byte *bufLimit = buf + inSize; + const CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; + ELzmaDummy res; + + { + const CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1); + + prob = probs + IsMatch + COMBINED_PS_STATE; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + + /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ + + prob = probs + Literal; + if (p->checkDicSize != 0 || p->processedPos != 0) + prob += ((UInt32)LZMA_LIT_SIZE * + ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + + (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); + + if (state < kNumLitStates) + { + unsigned symbol = 1; + do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); + } + else + { + unsigned matchByte = p->dic[p->dicPos - p->reps[0] + + (p->dicPos < p->reps[0] ? p->dicBufSize : 0)]; + unsigned offs = 0x100; + unsigned symbol = 1; + do + { + unsigned bit; + const CLzmaProb *probLit; + matchByte += matchByte; + bit = offs; + offs &= matchByte; + probLit = prob + (offs + bit + symbol); + GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; ) + } + while (symbol < 0x100); + } + res = DUMMY_LIT; + } + else + { + unsigned len; + UPDATE_1_CHECK; + + prob = probs + IsRep + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + state = 0; + prob = probs + LenCoder; + res = DUMMY_MATCH; + } + else + { + UPDATE_1_CHECK; + res = DUMMY_REP; + prob = probs + IsRepG0 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + prob = probs + IsRep0Long + COMBINED_PS_STATE; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + NORMALIZE_CHECK; + return DUMMY_REP; + } + else + { + UPDATE_1_CHECK; + } + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG1 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG2 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + } + } + } + state = kNumStates; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + const CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; + probLen = prob + LenLow + GET_LEN_STATE; + offset = 0; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenChoice2; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + offset = kLenNumLowSymbols; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenHigh; + offset = kLenNumLowSymbols * 2; + limit = 1 << kLenNumHighBits; + } + } + TREE_DECODE_CHECK(probLen, limit, len); + len += offset; + } + + if (state < 4) + { + unsigned posSlot; + prob = probs + PosSlot + + ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << + kNumPosSlotBits); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); + if (posSlot >= kStartPosModelIndex) + { + unsigned numDirectBits = ((posSlot >> 1) - 1); + + /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ + + if (posSlot < kEndPosModelIndex) + { + prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE_CHECK + range >>= 1; + code -= range & (((code - range) >> 31) - 1); + /* if (code >= range) code -= range; */ + } + while (--numDirectBits); + prob = probs + Align; + numDirectBits = kNumAlignBits; + } + { + unsigned i = 1; + unsigned m = 1; + do + { + REV_BIT_CHECK(prob, i, m); + } + while (--numDirectBits); + } + } + } + } + } + NORMALIZE_CHECK; + return res; +} + + +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState) +{ + p->remainLen = kMatchSpecLenStart + 1; + p->tempBufSize = 0; + + if (initDic) + { + p->processedPos = 0; + p->checkDicSize = 0; + p->remainLen = kMatchSpecLenStart + 2; + } + if (initState) + p->remainLen = kMatchSpecLenStart + 2; +} + +void LzmaDec_Init(CLzmaDec *p) +{ + p->dicPos = 0; + LzmaDec_InitDicAndState(p, True, True); +} + + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT inSize = *srcLen; + (*srcLen) = 0; + + *status = LZMA_STATUS_NOT_SPECIFIED; + + if (p->remainLen > kMatchSpecLenStart) + { + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) + p->tempBuf[p->tempBufSize++] = *src++; + if (p->tempBufSize != 0 && p->tempBuf[0] != 0) + return SZ_ERROR_DATA; + if (p->tempBufSize < RC_INIT_SIZE) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + p->code = + ((UInt32)p->tempBuf[1] << 24) + | ((UInt32)p->tempBuf[2] << 16) + | ((UInt32)p->tempBuf[3] << 8) + | ((UInt32)p->tempBuf[4]); + p->range = 0xFFFFFFFF; + p->tempBufSize = 0; + + if (p->remainLen > kMatchSpecLenStart + 1) + { + SizeT numProbs = LzmaProps_GetNumProbs(&p->prop); + SizeT i; + CLzmaProb *probs = p->probs; + for (i = 0; i < numProbs; i++) + probs[i] = kBitModelTotal >> 1; + p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; + p->state = 0; + } + + p->remainLen = 0; + } + + LzmaDec_WriteRem(p, dicLimit); + + while (p->remainLen != kMatchSpecLenStart) + { + int checkEndMarkNow = 0; + + if (p->dicPos >= dicLimit) + { + if (p->remainLen == 0 && p->code == 0) + { + *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; + return SZ_OK; + } + if (finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + if (p->remainLen != 0) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + checkEndMarkNow = 1; + } + + if (p->tempBufSize == 0) + { + SizeT processed; + const Byte *bufLimit; + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + int dummyRes = LzmaDec_TryDummy(p, src, inSize); + if (dummyRes == DUMMY_ERROR) + { + memcpy(p->tempBuf, src, inSize); + p->tempBufSize = (unsigned)inSize; + (*srcLen) += inSize; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + bufLimit = src; + } + else + bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + p->buf = src; + if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) + return SZ_ERROR_DATA; + processed = (SizeT)(p->buf - src); + (*srcLen) += processed; + src += processed; + inSize -= processed; + } + else + { + unsigned rem = p->tempBufSize, lookAhead = 0; + while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) + p->tempBuf[rem++] = src[lookAhead++]; + p->tempBufSize = rem; + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem); + if (dummyRes == DUMMY_ERROR) + { + (*srcLen) += (SizeT)lookAhead; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + } + p->buf = p->tempBuf; + if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) + return SZ_ERROR_DATA; + + { + unsigned kkk = (unsigned)(p->buf - p->tempBuf); + if (rem < kkk) + return SZ_ERROR_FAIL; /* some internal error */ + rem -= kkk; + if (lookAhead < rem) + return SZ_ERROR_FAIL; /* some internal error */ + lookAhead -= rem; + } + (*srcLen) += (SizeT)lookAhead; + src += lookAhead; + inSize -= (SizeT)lookAhead; + p->tempBufSize = 0; + } + } + + if (p->code != 0) + return SZ_ERROR_DATA; + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; +} + + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT outSize = *destLen; + SizeT inSize = *srcLen; + *srcLen = *destLen = 0; + for (;;) + { + SizeT inSizeCur = inSize, outSizeCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + if (p->dicPos == p->dicBufSize) + p->dicPos = 0; + dicPos = p->dicPos; + if (outSize > p->dicBufSize - dicPos) + { + outSizeCur = p->dicBufSize; + curFinishMode = LZMA_FINISH_ANY; + } + else + { + outSizeCur = dicPos + outSize; + curFinishMode = finishMode; + } + + res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); + src += inSizeCur; + inSize -= inSizeCur; + *srcLen += inSizeCur; + outSizeCur = p->dicPos - dicPos; + memcpy(dest, p->dic + dicPos, outSizeCur); + dest += outSizeCur; + outSize -= outSizeCur; + *destLen += outSizeCur; + if (res != 0) + return res; + if (outSizeCur == 0 || outSize == 0) + return SZ_OK; + } +} + +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->probs); + p->probs = NULL; +} + +static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->dic); + p->dic = NULL; +} + +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc) +{ + LzmaDec_FreeProbs(p, alloc); + LzmaDec_FreeDict(p, alloc); +} + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) +{ + UInt32 dicSize; + Byte d; + + if (size < LZMA_PROPS_SIZE) + return SZ_ERROR_UNSUPPORTED; + else + dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); + + if (dicSize < LZMA_DIC_MIN) + dicSize = LZMA_DIC_MIN; + p->dicSize = dicSize; + + d = data[0]; + if (d >= (9 * 5 * 5)) + return SZ_ERROR_UNSUPPORTED; + + p->lc = (Byte)(d % 9); + d /= 9; + p->pb = (Byte)(d / 5); + p->lp = (Byte)(d % 5); + + return SZ_OK; +} + +static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc) +{ + UInt32 numProbs = LzmaProps_GetNumProbs(propNew); + if (!p->probs || numProbs != p->numProbs) + { + LzmaDec_FreeProbs(p, alloc); + p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb)); + if (!p->probs) + return SZ_ERROR_MEM; + p->probs_1664 = p->probs + 1664; + p->numProbs = numProbs; + } + return SZ_OK; +} + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) +{ + CLzmaProps propNew; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) +{ + CLzmaProps propNew; + SizeT dicBufSize; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + + { + UInt32 dictSize = propNew.dicSize; + SizeT mask = ((UInt32)1 << 12) - 1; + if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; + else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; + dicBufSize = ((SizeT)dictSize + mask) & ~mask; + if (dicBufSize < dictSize) + dicBufSize = dictSize; + } + + if (!p->dic || dicBufSize != p->dicBufSize) + { + LzmaDec_FreeDict(p, alloc); + p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize); + if (!p->dic) + { + LzmaDec_FreeProbs(p, alloc); + return SZ_ERROR_MEM; + } + } + p->dicBufSize = dicBufSize; + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAllocPtr alloc) +{ + CLzmaDec p; + SRes res; + SizeT outSize = *destLen, inSize = *srcLen; + *destLen = *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + if (inSize < RC_INIT_SIZE) + return SZ_ERROR_INPUT_EOF; + LzmaDec_Construct(&p); + RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); + p.dic = dest; + p.dicBufSize = outSize; + LzmaDec_Init(&p); + *srcLen = inSize; + res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + *destLen = p.dicPos; + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + LzmaDec_FreeProbs(&p, alloc); + return res; +} diff --git a/cores/saturn/deps/lzma/src/LzmaEnc.c b/cores/saturn/deps/lzma/src/LzmaEnc.c new file mode 100644 index 000000000..2d3839df2 --- /dev/null +++ b/cores/saturn/deps/lzma/src/LzmaEnc.c @@ -0,0 +1,1330 @@ +/* LzmaEnc.c -- LZMA Encoder +2019-01-10: Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +/* #define SHOW_STAT */ +/* #define SHOW_STAT2 */ + +#if defined(SHOW_STAT) || defined(SHOW_STAT2) +#include +#endif + +#include "LzmaEnc.h" + +#include "LzFind.h" +#ifndef _7ZIP_ST +#include "LzFindMt.h" +#endif + +#ifdef SHOW_STAT +static unsigned g_STAT_OFFSET = 0; +#endif + +#define kLzmaMaxHistorySize ((UInt32)3 << 29) +/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */ + +#define kNumTopBits 24 +#define kTopValue ((UInt32)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 +#define kProbInitValue (kBitModelTotal >> 1) + +#define kNumMoveReducingBits 4 +#define kNumBitPriceShiftBits 4 +#define kBitPrice (1 << kNumBitPriceShiftBits) + +#define REP_LEN_COUNT 64 + +void LzmaEncProps_Init(CLzmaEncProps *p) +{ + p->level = 5; + p->dictSize = p->mc = 0; + p->reduceSize = (UInt64)(Int64)-1; + p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; + p->writeEndMark = 0; +} + +void LzmaEncProps_Normalize(CLzmaEncProps *p) +{ + int level = p->level; + if (level < 0) level = 5; + p->level = level; + + if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26))); + if (p->dictSize > p->reduceSize) + { + unsigned i; + UInt32 reduceSize = (UInt32)p->reduceSize; + for (i = 11; i <= 30; i++) + { + if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; } + if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; } + } + } + + if (p->lc < 0) p->lc = 3; + if (p->lp < 0) p->lp = 0; + if (p->pb < 0) p->pb = 2; + + if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); + if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); + if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); + if (p->numHashBytes < 0) p->numHashBytes = 4; + if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); + + if (p->numThreads < 0) + p->numThreads = + #ifndef _7ZIP_ST + ((p->btMode && p->algo) ? 2 : 1); + #else + 1; + #endif +} + +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) +{ + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + return props.dictSize; +} + +#if (_MSC_VER >= 1400) +/* BSR code is fast for some new CPUs */ +/* #define LZMA_LOG_BSR */ +#endif + +#ifdef LZMA_LOG_BSR + +#define kDicLogSizeMaxCompress 32 + +#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); } + +static unsigned GetPosSlot1(UInt32 pos) +{ + unsigned res; + BSR2_RET(pos, res); + return res; +} +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } + +#else + +#define kNumLogBits (9 + sizeof(size_t) / 2) +/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */ + +#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) + +static void LzmaEnc_FastPosInit(Byte *g_FastPos) +{ + unsigned slot; + g_FastPos[0] = 0; + g_FastPos[1] = 1; + g_FastPos += 2; + + for (slot = 2; slot < kNumLogBits * 2; slot++) + { + size_t k = ((size_t)1 << ((slot >> 1) - 1)); + size_t j; + for (j = 0; j < k; j++) + g_FastPos[j] = (Byte)slot; + g_FastPos += k; + } +} + +/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */ +/* +#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ + (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } +*/ + +/* +#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ + (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } +*/ + +#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } + +/* +#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ + p->g_FastPos[pos >> 6] + 12 : \ + p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } +*/ + +#define GetPosSlot1(pos) p->g_FastPos[pos] +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); } + +#endif + + +#define LZMA_NUM_REPS 4 + +typedef UInt16 CState; +typedef UInt16 CExtra; + +typedef struct +{ + UInt32 price; + CState state; + CExtra extra; + // 0 : normal + // 1 : LIT : MATCH + // > 1 : MATCH (extra-1) : LIT : REP0 (len) + UInt32 len; + UInt32 dist; + UInt32 reps[LZMA_NUM_REPS]; +} COptimal; + + +// 18.06 +#define kNumOpts (1 << 11) +#define kPackReserve (kNumOpts * 8) +// #define kNumOpts (1 << 12) +// #define kPackReserve (1 + kNumOpts * 2) + +#define kNumLenToPosStates 4 +#define kNumPosSlotBits 6 +#define kDicLogSizeMin 0 +#define kDicLogSizeMax 32 +#define kDistTableSizeMax (kDicLogSizeMax * 2) + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) +#define kAlignMask (kAlignTableSize - 1) + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +typedef +#ifdef _LZMA_PROB32 + UInt32 +#else + UInt16 +#endif + CLzmaProb; + +#define LZMA_PB_MAX 4 +#define LZMA_LC_MAX 8 +#define LZMA_LP_MAX 4 + +#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) +#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols) + +#define LZMA_MATCH_LEN_MIN 2 +#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) + +#define kNumStates 12 + + +typedef struct +{ + CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)]; + CLzmaProb high[kLenNumHighSymbols]; +} CLenEnc; + + +typedef struct +{ + unsigned tableSize; + UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; + // UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2]; + // UInt32 prices2[kLenNumSymbolsTotal]; +} CLenPriceEnc; + +#define GET_PRICE_LEN(p, posState, len) \ + ((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN]) + +/* +#define GET_PRICE_LEN(p, posState, len) \ + ((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9))) +*/ + +typedef struct +{ + UInt32 range; + unsigned cache; + UInt64 low; + UInt64 cacheSize; + Byte *buf; + Byte *bufLim; + Byte *bufBase; + ISeqOutStream *outStream; + UInt64 processed; + SRes res; +} CRangeEnc; + + +typedef struct +{ + CLzmaProb *litProbs; + + unsigned state; + UInt32 reps[LZMA_NUM_REPS]; + + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb posEncoders[kNumFullDistances]; + + CLenEnc lenProbs; + CLenEnc repLenProbs; + +} CSaveState; + + +typedef UInt32 CProbPrice; + + +typedef struct +{ + void *matchFinderObj; + IMatchFinder matchFinder; + + unsigned optCur; + unsigned optEnd; + + unsigned longestMatchLen; + unsigned numPairs; + UInt32 numAvail; + + unsigned state; + unsigned numFastBytes; + unsigned additionalOffset; + UInt32 reps[LZMA_NUM_REPS]; + unsigned lpMask, pbMask; + CLzmaProb *litProbs; + CRangeEnc rc; + + UInt32 backRes; + + unsigned lc, lp, pb; + unsigned lclp; + + BoolInt fastMode; + BoolInt writeEndMark; + BoolInt finished; + BoolInt multiThread; + BoolInt needInit; + // BoolInt _maxMode; + + UInt64 nowPos64; + + unsigned matchPriceCount; + // unsigned alignPriceCount; + int repLenEncCounter; + + unsigned distTableSize; + + UInt32 dictSize; + SRes result; + + #ifndef _7ZIP_ST + BoolInt mtMode; + // begin of CMatchFinderMt is used in LZ thread + CMatchFinderMt matchFinderMt; + // end of CMatchFinderMt is used in BT and HASH threads + #endif + + CMatchFinder matchFinderBase; + + #ifndef _7ZIP_ST + Byte pad[128]; + #endif + + // LZ thread + CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; + + UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; + + UInt32 alignPrices[kAlignTableSize]; + UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; + UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; + + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb posEncoders[kNumFullDistances]; + + CLenEnc lenProbs; + CLenEnc repLenProbs; + + #ifndef LZMA_LOG_BSR + Byte g_FastPos[1 << kNumLogBits]; + #endif + + CLenPriceEnc lenEnc; + CLenPriceEnc repLenEnc; + + COptimal opt[kNumOpts]; + + CSaveState saveState; + + #ifndef _7ZIP_ST + Byte pad2[128]; + #endif +} CLzmaEnc; + + +SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + + if (props.lc > LZMA_LC_MAX + || props.lp > LZMA_LP_MAX + || props.pb > LZMA_PB_MAX + || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress) + || props.dictSize > kLzmaMaxHistorySize) + return SZ_ERROR_PARAM; + + p->dictSize = props.dictSize; + { + unsigned fb = props.fb; + if (fb < 5) + fb = 5; + if (fb > LZMA_MATCH_LEN_MAX) + fb = LZMA_MATCH_LEN_MAX; + p->numFastBytes = fb; + } + p->lc = props.lc; + p->lp = props.lp; + p->pb = props.pb; + p->fastMode = (props.algo == 0); + // p->_maxMode = True; + p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0); + { + unsigned numHashBytes = 4; + if (props.btMode) + { + if (props.numHashBytes < 2) + numHashBytes = 2; + else if (props.numHashBytes < 4) + numHashBytes = props.numHashBytes; + } + p->matchFinderBase.numHashBytes = numHashBytes; + } + + p->matchFinderBase.cutValue = props.mc; + + p->writeEndMark = props.writeEndMark; + + #ifndef _7ZIP_ST + /* + if (newMultiThread != _multiThread) + { + ReleaseMatchFinder(); + _multiThread = newMultiThread; + } + */ + p->multiThread = (props.numThreads > 1); + #endif + + return SZ_OK; +} + + +void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + p->matchFinderBase.expectedDataSize = expectedDataSiize; +} + + +#define kState_Start 0 +#define kState_LitAfterMatch 4 +#define kState_LitAfterRep 5 +#define kState_MatchAfterLit 7 +#define kState_RepAfterLit 8 + +static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; +static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; +static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; +static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; + +#define IsLitState(s) ((s) < 7) +#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1) +#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) + +#define kInfinityPrice (1 << 30) + +static void RangeEnc_Construct(CRangeEnc *p) +{ + p->outStream = NULL; + p->bufBase = NULL; +} + +#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) +#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) + +#define RC_BUF_SIZE (1 << 16) + +static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc) +{ + if (!p->bufBase) + { + p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE); + if (!p->bufBase) + return 0; + p->bufLim = p->bufBase + RC_BUF_SIZE; + } + return 1; +} + +static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->bufBase); + p->bufBase = 0; +} + +static void RangeEnc_Init(CRangeEnc *p) +{ + /* Stream.Init(); */ + p->range = 0xFFFFFFFF; + p->cache = 0; + p->low = 0; + p->cacheSize = 0; + + p->buf = p->bufBase; + + p->processed = 0; + p->res = SZ_OK; +} + +MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) +{ + size_t num; + if (p->res != SZ_OK) + return; + num = p->buf - p->bufBase; + if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num)) + p->res = SZ_ERROR_WRITE; + p->processed += num; + p->buf = p->bufBase; +} + +MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) +{ + UInt32 low = (UInt32)p->low; + unsigned high = (unsigned)(p->low >> 32); + p->low = (UInt32)(low << 8); + if (low < (UInt32)0xFF000000 || high != 0) + { + { + Byte *buf = p->buf; + *buf++ = (Byte)(p->cache + high); + p->cache = (unsigned)(low >> 24); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); + if (p->cacheSize == 0) + return; + } + high += 0xFF; + for (;;) + { + Byte *buf = p->buf; + *buf++ = (Byte)(high); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); + if (--p->cacheSize == 0) + return; + } + } + p->cacheSize++; +} + +static void RangeEnc_FlushData(CRangeEnc *p) +{ + int i; + for (i = 0; i < 5; i++) + RangeEnc_ShiftLow(p); +} + +#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); } + +#define RC_BIT_PRE(p, prob) \ + ttt = *(prob); \ + newBound = (range >> kNumBitModelTotalBits) * ttt; + +// #define _LZMA_ENC_USE_BRANCH + +#ifdef _LZMA_ENC_USE_BRANCH + +#define RC_BIT(p, prob, bit) { \ + RC_BIT_PRE(p, prob) \ + if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \ + else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \ + *(prob) = (CLzmaProb)ttt; \ + RC_NORM(p) \ + } + +#else + +#define RC_BIT(p, prob, bit) { \ + UInt32 mask; \ + RC_BIT_PRE(p, prob) \ + mask = 0 - (UInt32)bit; \ + range &= mask; \ + mask &= newBound; \ + range -= mask; \ + (p)->low += mask; \ + mask = (UInt32)bit - 1; \ + range += newBound & mask; \ + mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \ + mask += ((1 << kNumMoveBits) - 1); \ + ttt += (Int32)(mask - ttt) >> kNumMoveBits; \ + *(prob) = (CLzmaProb)ttt; \ + RC_NORM(p) \ + } + +#endif + + + + +#define RC_BIT_0_BASE(p, prob) \ + range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); + +#define RC_BIT_1_BASE(p, prob) \ + range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \ + +#define RC_BIT_0(p, prob) \ + RC_BIT_0_BASE(p, prob) \ + RC_NORM(p) + +#define RC_BIT_1(p, prob) \ + RC_BIT_1_BASE(p, prob) \ + RC_NORM(p) + +static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob) +{ + UInt32 range, ttt, newBound; + range = p->range; + RC_BIT_PRE(p, prob) + RC_BIT_0(p, prob) + p->range = range; +} + +static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym) +{ + UInt32 range = p->range; + sym |= 0x100; + do + { + UInt32 ttt, newBound; + // RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1); + CLzmaProb *prob = probs + (sym >> 8); + UInt32 bit = (sym >> 7) & 1; + sym <<= 1; + RC_BIT(p, prob, bit); + } + while (sym < 0x10000); + p->range = range; +} + +static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) +{ + UInt32 i; + for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++) + { + const unsigned kCyclesBits = kNumBitPriceShiftBits; + UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1)); + unsigned bitCount = 0; + unsigned j; + for (j = 0; j < kCyclesBits; j++) + { + w = w * w; + bitCount <<= 1; + while (w >= ((UInt32)1 << 16)) + { + w >>= 1; + bitCount++; + } + } + ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + // printf("\n%3d: %5d", i, ProbPrices[i]); + } +} + + +#define GET_PRICE(prob, bit) \ + p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + +#define GET_PRICEa(prob, bit) \ + ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + +#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + +#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + + +static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices) +{ + UInt32 price = 0; + sym |= 0x100; + do + { + unsigned bit = sym & 1; + sym >>= 1; + price += GET_PRICEa(probs[sym], bit); + } + while (sym >= 2); + return price; +} + + +static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices) +{ + UInt32 price = 0; + UInt32 offs = 0x100; + sym |= 0x100; + do + { + matchByte <<= 1; + price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1); + sym <<= 1; + offs &= ~(matchByte ^ sym); + } + while (sym < 0x10000); + return price; +} + + + +static void LenEnc_Init(CLenEnc *p) +{ + unsigned i; + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++) + p->low[i] = kProbInitValue; + for (i = 0; i < kLenNumHighSymbols; i++) + p->high[i] = kProbInitValue; +} + +static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState) +{ + UInt32 range, ttt, newBound; + CLzmaProb *probs = p->low; + range = rc->range; + RC_BIT_PRE(rc, probs); + if (sym >= kLenNumLowSymbols) + { + RC_BIT_1(rc, probs); + probs += kLenNumLowSymbols; + RC_BIT_PRE(rc, probs); + if (sym >= kLenNumLowSymbols * 2) + { + RC_BIT_1(rc, probs); + rc->range = range; + // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2); + LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2); + return; + } + sym -= kLenNumLowSymbols; + } + + // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym); + { + unsigned m; + unsigned bit; + RC_BIT_0(rc, probs); + probs += (posState << (1 + kLenNumLowBits)); + bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit; + bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit; + bit = sym & 1; RC_BIT(rc, probs + m, bit); + rc->range = range; + } +} + +static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices) +{ + unsigned i; + for (i = 0; i < 8; i += 2) + { + UInt32 price = startPrice; + UInt32 prob; + price += GET_PRICEa(probs[1 ], (i >> 2)); + price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1); + prob = probs[4 + (i >> 1)]; + prices[i ] = price + GET_PRICEa_0(prob); + prices[i + 1] = price + GET_PRICEa_1(prob); + } +} + + +MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( + CLenPriceEnc *p, + unsigned numPosStates, + const CLenEnc *enc, + const CProbPrice *ProbPrices) +{ + UInt32 b; + + { + unsigned prob = enc->low[0]; + UInt32 a, c; + unsigned posState; + b = GET_PRICEa_1(prob); + a = GET_PRICEa_0(prob); + c = b + GET_PRICEa_0(enc->low[kLenNumLowSymbols]); + for (posState = 0; posState < numPosStates; posState++) + { + UInt32 *prices = p->prices[posState]; + const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits)); + SetPrices_3(probs, a, prices, ProbPrices); + SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices); + } + } + + /* + { + unsigned i; + UInt32 b; + a = GET_PRICEa_0(enc->low[0]); + for (i = 0; i < kLenNumLowSymbols; i++) + p->prices2[i] = a; + a = GET_PRICEa_1(enc->low[0]); + b = a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]); + for (i = kLenNumLowSymbols; i < kLenNumLowSymbols * 2; i++) + p->prices2[i] = b; + a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]); + } + */ + + // p->counter = numSymbols; + // p->counter = 64; + + { + unsigned i = p->tableSize; + + if (i > kLenNumLowSymbols * 2) + { + const CLzmaProb *probs = enc->high; + UInt32 *prices = p->prices[0] + kLenNumLowSymbols * 2; + i -= kLenNumLowSymbols * 2 - 1; + i >>= 1; + b += GET_PRICEa_1(enc->low[kLenNumLowSymbols]); + do + { + /* + p->prices2[i] = a + + // RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices); + LitEnc_GetPrice(probs, i - kLenNumLowSymbols * 2, ProbPrices); + */ + // UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices); + unsigned sym = --i + (1 << (kLenNumHighBits - 1)); + UInt32 price = b; + do + { + unsigned bit = sym & 1; + sym >>= 1; + price += GET_PRICEa(probs[sym], bit); + } + while (sym >= 2); + + { + unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; + prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob); + prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob); + } + } + while (i); + + { + unsigned posState; + size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); + for (posState = 1; posState < numPosStates; posState++) + memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num); + } + } + } +} + +/* + #ifdef SHOW_STAT + g_STAT_OFFSET += num; + printf("\n MovePos %u", num); + #endif +*/ + +#define MOVE_POS(p, num) { \ + p->additionalOffset += (num); \ + p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); } + + +#define MARK_LIT ((UInt32)(Int32)-1) + +#define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; } +#define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; } +#define IsShortRep(p) ((p)->dist == 0) + + +#define GetPrice_ShortRep(p, state, posState) \ + ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState])) + +#define GetPrice_Rep_0(p, state, posState) ( \ + GET_PRICE_1(p->isMatch[state][posState]) \ + + GET_PRICE_1(p->isRep0Long[state][posState])) \ + + GET_PRICE_1(p->isRep[state]) \ + + GET_PRICE_0(p->isRepG0[state]) + +MY_FORCE_INLINE +static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState) +{ + UInt32 price; + UInt32 prob = p->isRepG0[state]; + if (repIndex == 0) + { + price = GET_PRICE_0(prob); + price += GET_PRICE_1(p->isRep0Long[state][posState]); + } + else + { + price = GET_PRICE_1(prob); + prob = p->isRepG1[state]; + if (repIndex == 1) + price += GET_PRICE_0(prob); + else + { + price += GET_PRICE_1(prob); + price += GET_PRICE(p->isRepG2[state], repIndex - 2); + } + } + return price; +} + + +static SRes CheckErrors(CLzmaEnc *p) +{ + if (p->result != SZ_OK) + return p->result; + if (p->rc.res != SZ_OK) + p->result = SZ_ERROR_WRITE; + if (p->matchFinderBase.result != SZ_OK) + p->result = SZ_ERROR_READ; + if (p->result != SZ_OK) + p->finished = True; + return p->result; +} + + +MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) +{ + unsigned i; + const CProbPrice *ProbPrices = p->ProbPrices; + const CLzmaProb *probs = p->posAlignEncoder; + // p->alignPriceCount = 0; + for (i = 0; i < kAlignTableSize / 2; i++) + { + UInt32 price = 0; + unsigned sym = i; + unsigned m = 1; + unsigned bit; + UInt32 prob; + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + prob = probs[m]; + p->alignPrices[i ] = price + GET_PRICEa_0(prob); + p->alignPrices[i + 8] = price + GET_PRICEa_1(prob); + // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); + } +} + + +MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) +{ + // int y; for (y = 0; y < 100; y++) { + + UInt32 tempPrices[kNumFullDistances]; + unsigned i, lps; + + const CProbPrice *ProbPrices = p->ProbPrices; + p->matchPriceCount = 0; + + for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++) + { + unsigned posSlot = GetPosSlot1(i); + unsigned footerBits = (posSlot >> 1) - 1; + unsigned base = ((2 | (posSlot & 1)) << footerBits); + const CLzmaProb *probs = p->posEncoders + (size_t)base * 2; + // tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices); + UInt32 price = 0; + unsigned m = 1; + unsigned sym = i; + unsigned offset = (unsigned)1 << footerBits; + base += i; + + if (footerBits) + do + { + unsigned bit = sym & 1; + sym >>= 1; + price += GET_PRICEa(probs[m], bit); + m = (m << 1) + bit; + } + while (--footerBits); + + { + unsigned prob = probs[m]; + tempPrices[base ] = price + GET_PRICEa_0(prob); + tempPrices[base + offset] = price + GET_PRICEa_1(prob); + } + } + + for (lps = 0; lps < kNumLenToPosStates; lps++) + { + unsigned slot; + unsigned distTableSize2 = (p->distTableSize + 1) >> 1; + UInt32 *posSlotPrices = p->posSlotPrices[lps]; + const CLzmaProb *probs = p->posSlotEncoder[lps]; + + for (slot = 0; slot < distTableSize2; slot++) + { + // posSlotPrices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices); + UInt32 price; + unsigned bit; + unsigned sym = slot + (1 << (kNumPosSlotBits - 1)); + unsigned prob; + bit = sym & 1; sym >>= 1; price = GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + prob = probs[(size_t)slot + (1 << (kNumPosSlotBits - 1))]; + posSlotPrices[(size_t)slot * 2 ] = price + GET_PRICEa_0(prob); + posSlotPrices[(size_t)slot * 2 + 1] = price + GET_PRICEa_1(prob); + } + + { + UInt32 delta = ((UInt32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits); + for (slot = kEndPosModelIndex / 2; slot < distTableSize2; slot++) + { + posSlotPrices[(size_t)slot * 2 ] += delta; + posSlotPrices[(size_t)slot * 2 + 1] += delta; + delta += ((UInt32)1 << kNumBitPriceShiftBits); + } + } + + { + UInt32 *dp = p->distancesPrices[lps]; + + dp[0] = posSlotPrices[0]; + dp[1] = posSlotPrices[1]; + dp[2] = posSlotPrices[2]; + dp[3] = posSlotPrices[3]; + + for (i = 4; i < kNumFullDistances; i += 2) + { + UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)]; + dp[i ] = slotPrice + tempPrices[i]; + dp[i + 1] = slotPrice + tempPrices[i + 1]; + } + } + } + // } +} + + + +void LzmaEnc_Construct(CLzmaEnc *p) +{ + RangeEnc_Construct(&p->rc); + MatchFinder_Construct(&p->matchFinderBase); + + #ifndef _7ZIP_ST + MatchFinderMt_Construct(&p->matchFinderMt); + p->matchFinderMt.MatchFinder = &p->matchFinderBase; + #endif + + { + CLzmaEncProps props; + LzmaEncProps_Init(&props); + LzmaEnc_SetProps(p, &props); + } + + #ifndef LZMA_LOG_BSR + LzmaEnc_FastPosInit(p->g_FastPos); + #endif + + LzmaEnc_InitPriceTables(p->ProbPrices); + p->litProbs = NULL; + p->saveState.litProbs = NULL; + +} + +CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) +{ + void *p; + p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc)); + if (p) + LzmaEnc_Construct((CLzmaEnc *)p); + return p; +} + +void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->litProbs); + ISzAlloc_Free(alloc, p->saveState.litProbs); + p->litProbs = NULL; + p->saveState.litProbs = NULL; +} + +void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + #ifndef _7ZIP_ST + MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); + #endif + + MatchFinder_Free(&p->matchFinderBase, allocBig); + LzmaEnc_FreeLits(p, alloc); + RangeEnc_Free(&p->rc, alloc); +} + +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); + ISzAlloc_Free(alloc, p); +} + + +#define kBigHashDicLimit ((UInt32)1 << 24) + +static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + UInt32 beforeSize = kNumOpts; + if (!RangeEnc_Alloc(&p->rc, alloc)) + return SZ_ERROR_MEM; + + #ifndef _7ZIP_ST + p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0)); + #endif + + { + unsigned lclp = p->lc + p->lp; + if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) + { + LzmaEnc_FreeLits(p, alloc); + p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + if (!p->litProbs || !p->saveState.litProbs) + { + LzmaEnc_FreeLits(p, alloc); + return SZ_ERROR_MEM; + } + p->lclp = lclp; + } + } + + p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0); + + if (beforeSize + p->dictSize < keepWindowSize) + beforeSize = keepWindowSize - p->dictSize; + + #ifndef _7ZIP_ST + if (p->mtMode) + { + RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, + LZMA_MATCH_LEN_MAX + + 1 /* 18.04 */ + , allocBig)); + p->matchFinderObj = &p->matchFinderMt; + p->matchFinderBase.bigHash = (Byte)( + (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0); + MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); + } + else + #endif + { + if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) + return SZ_ERROR_MEM; + p->matchFinderObj = &p->matchFinderBase; + MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); + } + + return SZ_OK; +} + +void LzmaEnc_Init(CLzmaEnc *p) +{ + unsigned i; + p->state = 0; + p->reps[0] = + p->reps[1] = + p->reps[2] = + p->reps[3] = 1; + + RangeEnc_Init(&p->rc); + + for (i = 0; i < (1 << kNumAlignBits); i++) + p->posAlignEncoder[i] = kProbInitValue; + + for (i = 0; i < kNumStates; i++) + { + unsigned j; + for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) + { + p->isMatch[i][j] = kProbInitValue; + p->isRep0Long[i][j] = kProbInitValue; + } + p->isRep[i] = kProbInitValue; + p->isRepG0[i] = kProbInitValue; + p->isRepG1[i] = kProbInitValue; + p->isRepG2[i] = kProbInitValue; + } + + { + for (i = 0; i < kNumLenToPosStates; i++) + { + CLzmaProb *probs = p->posSlotEncoder[i]; + unsigned j; + for (j = 0; j < (1 << kNumPosSlotBits); j++) + probs[j] = kProbInitValue; + } + } + { + for (i = 0; i < kNumFullDistances; i++) + p->posEncoders[i] = kProbInitValue; + } + + { + UInt32 num = (UInt32)0x300 << (p->lp + p->lc); + UInt32 k; + CLzmaProb *probs = p->litProbs; + for (k = 0; k < num; k++) + probs[k] = kProbInitValue; + } + + + LenEnc_Init(&p->lenProbs); + LenEnc_Init(&p->repLenProbs); + + p->optEnd = 0; + p->optCur = 0; + + { + for (i = 0; i < kNumOpts; i++) + p->opt[i].price = kInfinityPrice; + } + + p->additionalOffset = 0; + + p->pbMask = (1 << p->pb) - 1; + p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc); +} + + +void LzmaEnc_InitPrices(CLzmaEnc *p) +{ + if (!p->fastMode) + { + FillDistancesPrices(p); + FillAlignPrices(p); + } + + p->lenEnc.tableSize = + p->repLenEnc.tableSize = + p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; + + p->repLenEncCounter = REP_LEN_COUNT; + + LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices); +} + +typedef struct +{ + ISeqOutStream vt; + Byte *data; + SizeT rem; + BoolInt overflow; +} CLzmaEnc_SeqOutStreamBuf; + +static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size) +{ + CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt); + if (p->rem < size) + { + size = p->rem; + p->overflow = True; + } + memcpy(p->data, data, size); + p->rem -= size; + p->data += size; + return size; +} + + +UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); +} + + +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; +} + + +SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + unsigned i; + UInt32 dictSize = p->dictSize; + if (*size < LZMA_PROPS_SIZE) + return SZ_ERROR_PARAM; + *size = LZMA_PROPS_SIZE; + props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); + + if (dictSize >= ((UInt32)1 << 22)) + { + UInt32 kDictMask = ((UInt32)1 << 20) - 1; + if (dictSize < (UInt32)0xFFFFFFFF - kDictMask) + dictSize = (dictSize + kDictMask) & ~kDictMask; + } + else for (i = 11; i <= 30; i++) + { + if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; } + if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; } + } + + for (i = 0; i < 4; i++) + props[1 + i] = (Byte)(dictSize >> (8 * i)); + return SZ_OK; +} + + + + diff --git a/cores/saturn/deps/zlib/README b/cores/saturn/deps/zlib/README new file mode 100644 index 000000000..51106de47 --- /dev/null +++ b/cores/saturn/deps/zlib/README @@ -0,0 +1,115 @@ +ZLIB DATA COMPRESSION LIBRARY + +zlib 1.2.11 is a general purpose data compression library. All the code is +thread safe. The data format used by the zlib library is described by RFCs +(Request for Comments) 1950 to 1952 in the files +http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and +rfc1952 (gzip format). + +All functions of the compression library are documented in the file zlib.h +(volunteer to write man pages welcome, contact zlib@gzip.org). A usage example +of the library is given in the file test/example.c which also tests that +the library is working correctly. Another example is given in the file +test/minigzip.c. The compression library itself is composed of all source +files in the root directory. + +To compile all files and run the test program, follow the instructions given at +the top of Makefile.in. In short "./configure; make test", and if that goes +well, "make install" should work for most flavors of Unix. For Windows, use +one of the special makefiles in win32/ or contrib/vstudio/ . For VMS, use +make_vms.com. + +Questions about zlib should be sent to , or to Gilles Vollant + for the Windows DLL version. The zlib home page is +http://zlib.net/ . Before reporting a problem, please check this site to +verify that you have the latest version of zlib; otherwise get the latest +version and check whether the problem still exists or not. + +PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help. + +Mark Nelson wrote an article about zlib for the Jan. 1997 +issue of Dr. Dobb's Journal; a copy of the article is available at +http://marknelson.us/1997/01/01/zlib-engine/ . + +The changes made in version 1.2.11 are documented in the file ChangeLog. + +Unsupported third party contributions are provided in directory contrib/ . + +zlib is available in Java using the java.util.zip package, documented at +http://java.sun.com/developer/technicalArticles/Programming/compression/ . + +A Perl interface to zlib written by Paul Marquess is available +at CPAN (Comprehensive Perl Archive Network) sites, including +http://search.cpan.org/~pmqs/IO-Compress-Zlib/ . + +A Python interface to zlib written by A.M. Kuchling is +available in Python 1.5 and later versions, see +http://docs.python.org/library/zlib.html . + +zlib is built into tcl: http://wiki.tcl.tk/4610 . + +An experimental package to read and write files in .zip format, written on top +of zlib by Gilles Vollant , is available in the +contrib/minizip directory of zlib. + + +Notes for some targets: + +- For Windows DLL versions, please see win32/DLL_FAQ.txt + +- For 64-bit Irix, deflate.c must be compiled without any optimization. With + -O, one libpng test fails. The test works in 32 bit mode (with the -n32 + compiler flag). The compiler bug has been reported to SGI. + +- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works + when compiled with cc. + +- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is + necessary to get gzprintf working correctly. This is done by configure. + +- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with + other compilers. Use "make test" to check your compiler. + +- gzdopen is not supported on RISCOS or BEOS. + +- For PalmOs, see http://palmzlib.sourceforge.net/ + + +Acknowledgments: + + The deflate format used by zlib was defined by Phil Katz. The deflate and + zlib specifications were written by L. Peter Deutsch. Thanks to all the + people who reported problems and suggested various improvements in zlib; they + are too numerous to cite here. + +Copyright notice: + + (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + +If you use the zlib library in a product, we would appreciate *not* receiving +lengthy legal documents to sign. The sources are provided for free but without +warranty of any kind. The library has been entirely written by Jean-loup +Gailly and Mark Adler; it does not include third-party code. + +If you redistribute modified sources, we would appreciate that you include in +the file ChangeLog history information documenting your changes. Please read +the FAQ for more information on the distribution of modified source versions. diff --git a/cores/saturn/deps/zlib/adler32.c b/cores/saturn/deps/zlib/adler32.c new file mode 100644 index 000000000..87ce4c1b7 --- /dev/null +++ b/cores/saturn/deps/zlib/adler32.c @@ -0,0 +1,102 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2011, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" + +#define BASE 65521U /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +#define MOD(a) a %= BASE +#define MOD28(a) a %= BASE +#define MOD63(a) a %= BASE + +/* ========================================================================= */ +uLong ZEXPORT adler32_z(adler, buf, len) + uLong adler; + const Bytef *buf; + z_size_t len; +{ + unsigned long sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (len == 1) { + adler += buf[0]; + if (adler >= BASE) + adler -= BASE; + sum2 += adler; + if (sum2 >= BASE) + sum2 -= BASE; + return adler | (sum2 << 16); + } + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (buf == Z_NULL) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (len < 16) { + while (len--) { + adler += *buf++; + sum2 += adler; + } + if (adler >= BASE) + adler -= BASE; + MOD28(sum2); /* only added so many BASE's */ + return adler | (sum2 << 16); + } + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; + n = NMAX / 16; /* NMAX is divisible by 16 */ + do { + DO16(buf); /* 16 sums unrolled */ + buf += 16; + } while (--n); + MOD(adler); + MOD(sum2); + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + if (len) { /* avoid modulos if none remaining */ + while (len >= 16) { + len -= 16; + DO16(buf); + buf += 16; + } + while (len--) { + adler += *buf++; + sum2 += adler; + } + MOD(adler); + MOD(sum2); + } + + /* return recombined sums */ + return adler | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + return adler32_z(adler, buf, len); +} diff --git a/cores/saturn/deps/zlib/crc32.c b/cores/saturn/deps/zlib/crc32.c new file mode 100644 index 000000000..30e0fdf86 --- /dev/null +++ b/cores/saturn/deps/zlib/crc32.c @@ -0,0 +1,46 @@ +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Thanks to Rodney Brown for his contribution of faster + * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing + * tables for updating the shift register in one step with three exclusive-ors + * instead of four steps with four exclusive-ors. This results in about a + * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. + */ + +/* @(#) $Id$ */ + +#include "zutil.h" /* for STDC and FAR definitions */ + +/* Definitions for doing the crc four data bytes at a time. */ +#define TBLS 1 + +/* ======================================================================== + * Tables of CRC-32s of all single-byte values, made by make_crc_table(). + */ +#include "crc32.h" + +/* ========================================================================= */ +#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) +#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + +/* ========================================================================= */ +unsigned long ZEXPORT crc32(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + uInt len; +{ + if (buf == Z_NULL) + return 0UL; + + crc = crc ^ 0xffffffffUL; + while (len >= 8) { + DO8; + len -= 8; + } + if (len) do { + DO1; + } while (--len); + return crc ^ 0xffffffffUL; +} diff --git a/cores/saturn/deps/zlib/crc32.h b/cores/saturn/deps/zlib/crc32.h new file mode 100644 index 000000000..af0e6c915 --- /dev/null +++ b/cores/saturn/deps/zlib/crc32.h @@ -0,0 +1,61 @@ +/* crc32.h -- tables for rapid CRC calculation + * Generated automatically by crc32.c + */ + +local const z_crc_t FAR crc_table[TBLS][256] = +{ + { + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL + } +}; diff --git a/cores/saturn/deps/zlib/inffast.c b/cores/saturn/deps/zlib/inffast.c new file mode 100644 index 000000000..546845b58 --- /dev/null +++ b/cores/saturn/deps/zlib/inffast.c @@ -0,0 +1,264 @@ +/* inffast.c -- fast decoding + * Copyright (C) 1995-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + */ +void ZLIB_INTERNAL inflate_fast(strm, start) +z_streamp strm; +unsigned start; /* inflate()'s starting value for strm->avail_out */ +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *in; /* local strm->next_in */ + z_const unsigned char FAR *last; /* have enough input while in < last */ + unsigned char FAR *out; /* local strm->next_out */ + unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ + unsigned char FAR *end; /* while out < end, enough space available */ + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ + unsigned long hold; /* local strm->hold */ + unsigned bits; /* local strm->bits */ + code const FAR *lcode; /* local strm->lencode */ + code const FAR *dcode; /* local strm->distcode */ + unsigned lmask; /* mask for first level of length codes */ + unsigned dmask; /* mask for first level of distance codes */ + code here; /* retrieved table entry */ + unsigned op; /* code bits, operation, extra bits, or */ + /* window position, window bytes to copy */ + unsigned len; /* match length, unused bytes */ + unsigned dist; /* match distance */ + unsigned char FAR *from; /* where to copy match from */ + + /* copy state to local variables */ + state = (struct inflate_state FAR *)strm->state; + in = strm->next_in; + last = in + (strm->avail_in - 5); + out = strm->next_out; + beg = out - (start - strm->avail_out); + end = out + (strm->avail_out - 257); + wsize = state->wsize; + whave = state->whave; + wnext = state->wnext; + window = state->window; + hold = state->hold; + bits = state->bits; + lcode = state->lencode; + dcode = state->distcode; + lmask = (1U << state->lenbits) - 1; + dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + do { + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = lcode[hold & lmask]; + dolen: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op == 0) { /* literal */ + *out++ = (unsigned char)(here.val); + } + else if (op & 16) { /* length base */ + len = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (op) { + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + len += (unsigned)hold & ((1U << op) - 1); + hold >>= op; + bits -= op; + } + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = dcode[hold & dmask]; + dodist: + op = (unsigned)(here.bits); + hold >>= op; + bits -= op; + op = (unsigned)(here.op); + if (op & 16) { /* distance base */ + dist = (unsigned)(here.val); + op &= 15; /* number of extra bits */ + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + } + dist += (unsigned)hold & ((1U << op) - 1); + hold >>= op; + bits -= op; + op = (unsigned)(out - beg); /* max distance in output */ + if (dist > op) { /* see if copy from window */ + op = dist - op; /* distance back in window */ + if (op > whave) { + if (state->sane) { + strm->msg = + (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + } + from = window; + if (wnext == 0) { /* very common case */ + from += wsize - op; + if (op < len) { /* some from window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + else if (wnext < op) { /* wrap around window */ + from += wsize + wnext - op; + op -= wnext; + if (op < len) { /* some from end of window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = window; + if (wnext < len) { /* some from start of window */ + op = wnext; + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + } + else { /* contiguous in window */ + from += wnext - op; + if (op < len) { /* some from window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + while (len > 2) { + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; + len -= 3; + } + if (len) { + *out++ = *from++; + if (len > 1) + *out++ = *from++; + } + } + else { + from = out - dist; /* copy direct from output */ + do { /* minimum length is three */ + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; + len -= 3; + } while (len > 2); + if (len) { + *out++ = *from++; + if (len > 1) + *out++ = *from++; + } + } + } + else if ((op & 64) == 0) { /* 2nd level distance code */ + here = dcode[here.val + (hold & ((1U << op) - 1))]; + goto dodist; + } + else { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + } + else if ((op & 64) == 0) { /* 2nd level length code */ + here = lcode[here.val + (hold & ((1U << op) - 1))]; + goto dolen; + } + else if (op & 32) { /* end-of-block */ + state->mode = TYPE; + break; + } + else { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + } while (in < last && out < end); + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + len = bits >> 3; + in -= len; + bits -= len << 3; + hold &= (1U << bits) - 1; + + /* update state and return */ + strm->next_in = in; + strm->next_out = out; + strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); + strm->avail_out = (unsigned)(out < end ? + 257 + (end - out) : 257 - (out - end)); + state->hold = hold; + state->bits = bits; +} diff --git a/cores/saturn/deps/zlib/inffast.h b/cores/saturn/deps/zlib/inffast.h new file mode 100644 index 000000000..e5c1aa4ca --- /dev/null +++ b/cores/saturn/deps/zlib/inffast.h @@ -0,0 +1,11 @@ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-2003, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); diff --git a/cores/saturn/deps/zlib/inflate.c b/cores/saturn/deps/zlib/inflate.c new file mode 100644 index 000000000..2237f79e8 --- /dev/null +++ b/cores/saturn/deps/zlib/inflate.c @@ -0,0 +1,1050 @@ +/* inflate.c -- zlib decompression + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +/* function prototypes */ +local int inflateStateCheck OF((z_streamp strm)); +local void fixedtables OF((struct inflate_state FAR *state)); +local int updatewindow OF((z_streamp strm, const unsigned char FAR *end, + unsigned copy)); + +local int inflateStateCheck(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (strm == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + state = (struct inflate_state FAR *)strm->state; + if (state == Z_NULL || state->strm != strm || + state->mode < HEAD || state->mode > SYNC) + return 1; + return 0; +} + +int ZEXPORT inflateResetKeep(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + strm->total_in = strm->total_out = state->total = 0; + strm->msg = Z_NULL; + if (state->wrap) /* to support ill-conceived Java test suite */ + strm->adler = state->wrap & 1; + state->mode = HEAD; + state->last = 0; + state->havedict = 0; + state->head = Z_NULL; + state->hold = 0; + state->bits = 0; + state->lencode = state->distcode = state->next = state->codes; + state->sane = 1; + state->back = -1; + return Z_OK; +} + +int ZEXPORT inflateReset(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + state->wsize = 0; + state->whave = 0; + state->wnext = 0; + return inflateResetKeep(strm); +} + +int ZEXPORT inflateReset2(strm, windowBits) +z_streamp strm; +int windowBits; +{ + int wrap; + struct inflate_state FAR *state; + + /* get the state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* extract wrap request from windowBits parameter */ + if (windowBits < 0) { + wrap = 0; + windowBits = -windowBits; + } + else { + wrap = (windowBits >> 4) + 5; +#ifdef GUNZIP + if (windowBits < 48) + windowBits &= 15; +#endif + } + + /* set number of window bits, free window if different */ + if (windowBits && (windowBits < 8 || windowBits > 15)) + return Z_STREAM_ERROR; + if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { + ZFREE(strm, state->window); + state->window = Z_NULL; + } + + /* update state and reset the rest of it */ + state->wrap = wrap; + state->wbits = (unsigned)windowBits; + return inflateReset(strm); +} + +int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size) +z_streamp strm; +int windowBits; +const char *version; +int stream_size; +{ + int ret; + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL) return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { + return Z_STREAM_ERROR; + } + if (strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *) + ZALLOC(strm, 1, sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)state; + state->strm = strm; + state->window = Z_NULL; + state->mode = HEAD; /* to pass state test in inflateReset2() */ + ret = inflateReset2(strm, windowBits); + if (ret != Z_OK) { + ZFREE(strm, state); + strm->state = Z_NULL; + } + return ret; +} + +int ZEXPORT inflateInit_(strm, version, stream_size) +z_streamp strm; +const char *version; +int stream_size; +{ + return inflateInit2_(strm, DEF_WBITS, version, stream_size); +} + +local void fixedtables(state) +struct inflate_state FAR *state; +{ + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +/* + Update the window with the last wsize (normally 32K) bytes written before + returning. If window does not exist yet, create it. This is only called + when a window is already in use, or when output has been written during this + inflate call, but the end of the deflate stream has not been reached yet. + It is also called to create a window for dictionary data when a dictionary + is loaded. + + Providing output buffers larger than 32K to inflate() should provide a speed + advantage, since only the last 32K of output is copied to the sliding window + upon return from inflate(), and since all distances after the first 32K of + output will fall in the output data, making match copies simpler and faster. + The advantage may be dependent on the size of the processor's data caches. + */ +local int updatewindow(strm, end, copy) +z_streamp strm; +const Bytef *end; +unsigned copy; +{ + struct inflate_state FAR *state; + unsigned dist; + + state = (struct inflate_state FAR *)strm->state; + + /* if it hasn't been done already, allocate space for the window */ + if (state->window == Z_NULL) { + state->window = (unsigned char FAR *) + ZALLOC(strm, 1U << state->wbits, + sizeof(unsigned char)); + if (state->window == Z_NULL) return 1; + } + + /* if window not in use yet, initialize */ + if (state->wsize == 0) { + state->wsize = 1U << state->wbits; + state->wnext = 0; + state->whave = 0; + } + + /* copy state->wsize or less output bytes into the circular window */ + if (copy >= state->wsize) { + zmemcpy(state->window, end - state->wsize, state->wsize); + state->wnext = 0; + state->whave = state->wsize; + } + else { + dist = state->wsize - state->wnext; + if (dist > copy) dist = copy; + zmemcpy(state->window + state->wnext, end - copy, dist); + copy -= dist; + if (copy) { + zmemcpy(state->window, end - copy, copy); + state->wnext = copy; + state->whave = state->wsize; + } + else { + state->wnext += dist; + if (state->wnext == state->wsize) state->wnext = 0; + if (state->whave < state->wsize) state->whave += dist; + } + } + return 0; +} + +/* Macros for inflate(): */ + +/* check function to use adler32() for zlib or crc32() for gzip */ +#ifdef GUNZIP +# define UPDATE(check, buf, len) \ + (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) +#else +# define UPDATE(check, buf, len) adler32(check, buf, len) +#endif + +/* check macros for header crc */ +#ifdef GUNZIP +# define CRC2(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + check = crc32(check, hbuf, 2); \ + } while (0) + +# define CRC4(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + hbuf[2] = (unsigned char)((word) >> 16); \ + hbuf[3] = (unsigned char)((word) >> 24); \ + check = crc32(check, hbuf, 4); \ + } while (0) +#endif + +/* Load registers with state in inflate() for speed */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Restore state from registers in inflate() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflate() + if there is no input available. */ +#define PULLBYTE() \ + do { \ + if (have == 0) goto inf_leave; \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflate(). */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* + inflate() uses a state machine to process as much input data and generate as + much output data as possible before returning. The state machine is + structured roughly as follows: + + for (;;) switch (state) { + ... + case STATEn: + if (not enough input data or output space to make progress) + return; + ... make progress ... + state = STATEm; + break; + ... + } + + so when inflate() is called again, the same case is attempted again, and + if the appropriate resources are provided, the machine proceeds to the + next state. The NEEDBITS() macro is usually the way the state evaluates + whether it can proceed or should return. NEEDBITS() does the return if + the requested bits are not available. The typical use of the BITS macros + is: + + NEEDBITS(n); + ... do something with BITS(n) ... + DROPBITS(n); + + where NEEDBITS(n) either returns from inflate() if there isn't enough + input left to load n bits into the accumulator, or it continues. BITS(n) + gives the low n bits in the accumulator. When done, DROPBITS(n) drops + the low n bits off the accumulator. INITBITS() clears the accumulator + and sets the number of available bits to zero. BYTEBITS() discards just + enough bits to put the accumulator on a byte boundary. After BYTEBITS() + and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. + + NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return + if there is no input available. The decoding of variable length codes uses + PULLBYTE() directly in order to pull just enough bytes to decode the next + code, and no more. + + Some states loop until they get enough input, making sure that enough + state information is maintained to continue the loop where it left off + if NEEDBITS() returns in the loop. For example, want, need, and keep + would all have to actually be part of the saved state in case NEEDBITS() + returns: + + case STATEw: + while (want < need) { + NEEDBITS(n); + keep[want++] = BITS(n); + DROPBITS(n); + } + state = STATEx; + case STATEx: + + As shown above, if the next state is also the next case, then the break + is omitted. + + A state may also return if there is not enough output space available to + complete that state. Those states are copying stored data, writing a + literal byte, and copying a matching string. + + When returning, a "goto inf_leave" is used to update the total counters, + update the check value, and determine whether any progress has been made + during that inflate() call in order to return the proper return code. + Progress is defined as a change in either strm->avail_in or strm->avail_out. + When there is a window, goto inf_leave will update the window with the last + output written. If a goto inf_leave occurs in the middle of decompression + and there is no window currently, goto inf_leave will create one and copy + output to the window for the next call of inflate(). + + In this implementation, the flush parameter of inflate() only affects the + return code (per zlib.h). inflate() always writes as much as possible to + strm->next_out, given the space available and the provided input--the effect + documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers + the allocation of and copying into a sliding window until necessary, which + provides the effect documented in zlib.h for Z_FINISH when the entire input + stream available. So the only thing the flush parameter actually does is: + when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it + will return Z_BUF_ERROR if it has not reached the end of the stream. + */ + +int ZEXPORT inflate(strm, flush) +z_streamp strm; +int flush; +{ + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned in, out; /* save starting available input and output */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ +#ifdef GUNZIP + unsigned char hbuf[4]; /* buffer for gzip header crc calculation */ +#endif + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + if (inflateStateCheck(strm) || strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0)) + return Z_STREAM_ERROR; + + state = (struct inflate_state FAR *)strm->state; + if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ + LOAD(); + in = have; + out = left; + ret = Z_OK; + for (;;) + switch (state->mode) { + case HEAD: + if (state->wrap == 0) { + state->mode = TYPEDO; + break; + } + NEEDBITS(16); +#ifdef GUNZIP + if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ + if (state->wbits == 0) + state->wbits = 15; + state->check = crc32(0L, Z_NULL, 0); + CRC2(state->check, hold); + INITBITS(); + state->mode = FLAGS; + break; + } + state->flags = 0; /* expect zlib header */ + if (state->head != Z_NULL) + state->head->done = -1; + if (!(state->wrap & 1) || /* check if zlib header allowed */ +#else + if ( +#endif + ((BITS(8) << 8) + (hold >> 8)) % 31) { + strm->msg = (char *)"incorrect header check"; + state->mode = BAD; + break; + } + if (BITS(4) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + DROPBITS(4); + len = BITS(4) + 8; + if (state->wbits == 0) + state->wbits = len; + if (len > 15 || len > state->wbits) { + strm->msg = (char *)"invalid window size"; + state->mode = BAD; + break; + } + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = hold & 0x200 ? DICTID : TYPE; + INITBITS(); + break; +#ifdef GUNZIP + case FLAGS: + NEEDBITS(16); + state->flags = (int)(hold); + if ((state->flags & 0xff) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + if (state->flags & 0xe000) { + strm->msg = (char *)"unknown header flags set"; + state->mode = BAD; + break; + } + if (state->head != Z_NULL) + state->head->text = (int)((hold >> 8) & 1); + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + state->mode = TIME; + case TIME: + NEEDBITS(32); + if (state->head != Z_NULL) + state->head->time = hold; + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC4(state->check, hold); + INITBITS(); + state->mode = OS; + case OS: + NEEDBITS(16); + if (state->head != Z_NULL) { + state->head->xflags = (int)(hold & 0xff); + state->head->os = (int)(hold >> 8); + } + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + state->mode = EXLEN; + case EXLEN: + if (state->flags & 0x0400) { + NEEDBITS(16); + state->length = (unsigned)(hold); + if (state->head != Z_NULL) + state->head->extra_len = (unsigned)hold; + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + } + else if (state->head != Z_NULL) + state->head->extra = Z_NULL; + state->mode = EXTRA; + case EXTRA: + if (state->flags & 0x0400) { + copy = state->length; + if (copy > have) copy = have; + if (copy) { + if (state->head != Z_NULL && + state->head->extra != Z_NULL) { + len = state->head->extra_len - state->length; + zmemcpy(state->head->extra + len, next, + len + copy > state->head->extra_max ? + state->head->extra_max - len : copy); + } + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + state->length -= copy; + } + if (state->length) goto inf_leave; + } + state->length = 0; + state->mode = NAME; + case NAME: + if (state->flags & 0x0800) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->name != Z_NULL && + state->length < state->head->name_max) + state->head->name[state->length++] = (Bytef)len; + } while (len && copy < have); + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->name = Z_NULL; + state->length = 0; + state->mode = COMMENT; + case COMMENT: + if (state->flags & 0x1000) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->comment != Z_NULL && + state->length < state->head->comm_max) + state->head->comment[state->length++] = (Bytef)len; + } while (len && copy < have); + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->comment = Z_NULL; + state->mode = HCRC; + case HCRC: + if (state->flags & 0x0200) { + NEEDBITS(16); + if ((state->wrap & 4) && hold != (state->check & 0xffff)) { + strm->msg = (char *)"header crc mismatch"; + state->mode = BAD; + break; + } + INITBITS(); + } + if (state->head != Z_NULL) { + state->head->hcrc = (int)((state->flags >> 9) & 1); + state->head->done = 1; + } + strm->adler = state->check = crc32(0L, Z_NULL, 0); + state->mode = TYPE; + break; +#endif + case DICTID: + NEEDBITS(32); + strm->adler = state->check = ZSWAP32(hold); + INITBITS(); + state->mode = DICT; + case DICT: + if (state->havedict == 0) { + RESTORE(); + return Z_NEED_DICT; + } + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = TYPE; + case TYPE: + if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; + case TYPEDO: + if (state->last) { + BYTEBITS(); + state->mode = CHECK; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + state->mode = LEN_; /* decode codes */ + if (flush == Z_TREES) { + DROPBITS(2); + goto inf_leave; + } + break; + case 2: /* dynamic block */ + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + case STORED: + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + INITBITS(); + state->mode = COPY_; + if (flush == Z_TREES) goto inf_leave; + case COPY_: + state->mode = COPY; + case COPY: + copy = state->length; + if (copy) { + if (copy > have) copy = have; + if (copy > left) copy = left; + if (copy == 0) goto inf_leave; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + break; + } + state->mode = TYPE; + break; + case TABLE: + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + state->have = 0; + state->mode = LENLENS; + case LENLENS: + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + state->have = 0; + state->mode = CODELENS; + case CODELENS: + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = state->lens[state->have - 1]; + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (const code FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + state->mode = LEN_; + if (flush == Z_TREES) goto inf_leave; + case LEN_: + state->mode = LEN; + case LEN: + if (have >= 6 && left >= 258) { + RESTORE(); + inflate_fast(strm, out); + LOAD(); + if (state->mode == TYPE) + state->back = -1; + break; + } + state->back = 0; + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + state->length = (unsigned)here.val; + if ((int)(here.op) == 0) { + state->mode = LIT; + break; + } + if (here.op & 32) { + state->back = -1; + state->mode = TYPE; + break; + } + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + state->extra = (unsigned)(here.op) & 15; + state->mode = LENEXT; + case LENEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } + state->was = state->length; + state->mode = DIST; + case DIST: + for (;;) { + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)here.val; + state->extra = (unsigned)(here.op) & 15; + state->mode = DISTEXT; + case DISTEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } + state->mode = MATCH; + case MATCH: + if (left == 0) goto inf_leave; + copy = out - left; + if (state->offset > copy) { /* copy from window */ + copy = state->offset - copy; + if (copy > state->whave) { + if (state->sane) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + } + if (copy > state->wnext) { + copy -= state->wnext; + from = state->window + (state->wsize - copy); + } + else + from = state->window + (state->wnext - copy); + if (copy > state->length) copy = state->length; + } + else { /* copy from output */ + from = put - state->offset; + copy = state->length; + } + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = *from++; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; + case LIT: + if (left == 0) goto inf_leave; + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + case CHECK: + if (state->wrap) { + NEEDBITS(32); + out -= left; + strm->total_out += out; + state->total += out; + if ((state->wrap & 4) && out) + strm->adler = state->check = + UPDATE(state->check, put - out, out); + out = left; + if ((state->wrap & 4) && ( +#ifdef GUNZIP + state->flags ? hold : +#endif + ZSWAP32(hold)) != state->check) { + strm->msg = (char *)"incorrect data check"; + state->mode = BAD; + break; + } + INITBITS(); + } +#ifdef GUNZIP + state->mode = LENGTH; + case LENGTH: + if (state->wrap && state->flags) { + NEEDBITS(32); + if (hold != (state->total & 0xffffffffUL)) { + strm->msg = (char *)"incorrect length check"; + state->mode = BAD; + break; + } + INITBITS(); + } +#endif + state->mode = DONE; + case DONE: + ret = Z_STREAM_END; + goto inf_leave; + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + case MEM: + return Z_MEM_ERROR; + case SYNC: + default: + return Z_STREAM_ERROR; + } + + /* + Return from inflate(), updating the total counts and the check value. + If there was no progress during the inflate() call, return a buffer + error. Call updatewindow() to create and/or update the window state. + Note: a memory error from inflate() is non-recoverable. + */ + inf_leave: + RESTORE(); + if (state->wsize || (out != strm->avail_out && state->mode < BAD && + (state->mode < CHECK || flush != Z_FINISH))) + if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + in -= strm->avail_in; + out -= strm->avail_out; + strm->total_in += in; + strm->total_out += out; + state->total += out; + if ((state->wrap & 4) && out) + strm->adler = state->check = + UPDATE(state->check, strm->next_out - out, out); + strm->data_type = (int)state->bits + (state->last ? 64 : 0) + + (state->mode == TYPE ? 128 : 0) + + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); + if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) + ret = Z_BUF_ERROR; + return ret; +} + +int ZEXPORT inflateEnd(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + if (inflateStateCheck(strm)) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->window != Z_NULL) ZFREE(strm, state->window); + ZFREE(strm, strm->state); + strm->state = Z_NULL; + return Z_OK; +} diff --git a/cores/saturn/deps/zlib/inflate.h b/cores/saturn/deps/zlib/inflate.h new file mode 100644 index 000000000..dc2402a29 --- /dev/null +++ b/cores/saturn/deps/zlib/inflate.h @@ -0,0 +1,124 @@ +/* inflate.h -- internal inflate state definition + * Copyright (C) 1995-2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer decoding by inflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip decoding + should be left enabled. */ +#ifndef NO_GZIP +# define GUNZIP +#endif + +/* Possible inflate modes between inflate() calls */ +typedef enum { + HEAD = 16180, /* i: waiting for magic header */ + FLAGS, /* i: waiting for method and flags (gzip) */ + TIME, /* i: waiting for modification time (gzip) */ + OS, /* i: waiting for extra flags and operating system (gzip) */ + EXLEN, /* i: waiting for extra length (gzip) */ + EXTRA, /* i: waiting for extra bytes (gzip) */ + NAME, /* i: waiting for end of file name (gzip) */ + COMMENT, /* i: waiting for end of comment (gzip) */ + HCRC, /* i: waiting for header crc (gzip) */ + DICTID, /* i: waiting for dictionary check value */ + DICT, /* waiting for inflateSetDictionary() call */ + TYPE, /* i: waiting for type bits, including last-flag bit */ + TYPEDO, /* i: same, but skip check to exit inflate on new block */ + STORED, /* i: waiting for stored size (length and complement) */ + COPY_, /* i/o: same as COPY below, but only first time in */ + COPY, /* i/o: waiting for input or output to copy stored block */ + TABLE, /* i: waiting for dynamic block table lengths */ + LENLENS, /* i: waiting for code length code lengths */ + CODELENS, /* i: waiting for length/lit and distance code lengths */ + LEN_, /* i: same as LEN below, but only first time in */ + LEN, /* i: waiting for length/lit/eob code */ + LENEXT, /* i: waiting for length extra bits */ + DIST, /* i: waiting for distance code */ + DISTEXT, /* i: waiting for distance extra bits */ + MATCH, /* o: waiting for output space to copy string */ + LIT, /* o: waiting for output space to write literal */ + CHECK, /* i: waiting for 32-bit check value */ + LENGTH, /* i: waiting for 32-bit length (gzip) */ + DONE, /* finished check, done -- remain here until reset */ + BAD, /* got a data error -- remain here until reset */ + MEM, /* got an inflate() memory error -- remain here until reset */ + SYNC /* looking for synchronization bytes to restart inflate() */ +} inflate_mode; + +/* + State transitions between above modes - + + (most modes can go to BAD or MEM on error -- not shown for clarity) + + Process header: + HEAD -> (gzip) or (zlib) or (raw) + (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT -> + HCRC -> TYPE + (zlib) -> DICTID or TYPE + DICTID -> DICT -> TYPE + (raw) -> TYPEDO + Read deflate blocks: + TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK + STORED -> COPY_ -> COPY -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN_ + LEN_ -> LEN + Read deflate codes in fixed or dynamic block: + LEN -> LENEXT or LIT or TYPE + LENEXT -> DIST -> DISTEXT -> MATCH -> LEN + LIT -> LEN + Process trailer: + CHECK -> LENGTH -> DONE + */ + +/* State maintained between inflate() calls -- approximately 7K bytes, not + including the allocated sliding window, which is up to 32K bytes. */ +struct inflate_state { + z_streamp strm; /* pointer back to this zlib stream */ + inflate_mode mode; /* current inflate mode */ + int last; /* true if processing last block */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip, + bit 2 true to validate check value */ + int havedict; /* true if dictionary provided */ + int flags; /* gzip header method and flags (0 if zlib) */ + unsigned long check; /* protected copy of check value */ + unsigned long total; /* protected copy of output count */ + gz_headerp head; /* where to save gzip header information */ + /* sliding window */ + unsigned wbits; /* log base 2 of requested window size */ + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + /* bit accumulator */ + unsigned long hold; /* input bit accumulator */ + unsigned bits; /* number of bits in "in" */ + /* for string and stored block copying */ + unsigned length; /* literal or length of data to copy */ + unsigned offset; /* distance back to copy string from */ + /* for table and code decoding */ + unsigned extra; /* extra bits needed */ + /* fixed and dynamic code tables */ + code const FAR *lencode; /* starting table for length/literal codes */ + code const FAR *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code FAR *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ + int sane; /* if false, allow invalid distance too far */ + int back; /* bits back of last unprocessed length/lit */ + unsigned was; /* initial length of match */ +}; diff --git a/cores/saturn/deps/zlib/inftrees.c b/cores/saturn/deps/zlib/inftrees.c new file mode 100644 index 000000000..2ea08fc13 --- /dev/null +++ b/cores/saturn/deps/zlib/inftrees.c @@ -0,0 +1,304 @@ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" + +#define MAXBITS 15 + +const char inflate_copyright[] = + " inflate 1.2.11 Copyright 1995-2017 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* + Build a set of tables to decode the provided canonical Huffman code. + The code lengths are lens[0..codes-1]. The result starts at *table, + whose indices are 0..2^bits-1. work is a writable array of at least + lens shorts, which is used as a work area. type is the type of code + to be generated, CODES, LENS, or DISTS. On return, zero is success, + -1 is an invalid code, and +1 means that ENOUGH isn't enough. table + on return points to the next available entry's address. bits is the + requested root table index bits, and on return it is the actual root + table index bits. It will differ if the request is greater than the + longest code or if it is less than the shortest code. + */ +int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work) +codetype type; +unsigned short FAR *lens; +unsigned codes; +code FAR * FAR *table; +unsigned FAR *bits; +unsigned short FAR *work; +{ + unsigned len; /* a code's length in bits */ + unsigned sym; /* index of code symbols */ + unsigned min, max; /* minimum and maximum code lengths */ + unsigned root; /* number of index bits for root table */ + unsigned curr; /* number of index bits for current table */ + unsigned drop; /* code bits to drop for sub-table */ + int left; /* number of prefix codes available */ + unsigned used; /* code entries in table used */ + unsigned huff; /* Huffman code */ + unsigned incr; /* for incrementing code, index */ + unsigned fill; /* index for replicating entries */ + unsigned low; /* low bits for current root entry */ + unsigned mask; /* mask for low root bits */ + code here; /* table entry for duplication */ + code FAR *next; /* next available space in table */ + const unsigned short FAR *base; /* base value table to use */ + const unsigned short FAR *extra; /* extra bits table to use */ + unsigned match; /* use base and extra for symbol >= match */ + unsigned short count[MAXBITS+1]; /* number of codes of each length */ + unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ + static const unsigned short lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + static const unsigned short lext[31] = { /* Length codes 257..285 extra */ + 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; + static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577, 0, 0}; + static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ + 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, + 28, 28, 29, 29, 64, 64}; + + /* + Process a set of code lengths to create a canonical Huffman code. The + code lengths are lens[0..codes-1]. Each length corresponds to the + symbols 0..codes-1. The Huffman code is generated by first sorting the + symbols by length from short to long, and retaining the symbol order + for codes with equal lengths. Then the code starts with all zero bits + for the first code of the shortest length, and the codes are integer + increments for the same length, and zeros are appended as the length + increases. For the deflate format, these bits are stored backwards + from their more natural integer increment ordering, and so when the + decoding tables are built in the large loop below, the integer codes + are incremented backwards. + + This routine assumes, but does not check, that all of the entries in + lens[] are in the range 0..MAXBITS. The caller must assure this. + 1..MAXBITS is interpreted as that code length. zero means that that + symbol does not occur in this code. + + The codes are sorted by computing a count of codes for each length, + creating from that a table of starting indices for each length in the + sorted table, and then entering the symbols in order in the sorted + table. The sorted table is work[], with that space being provided by + the caller. + + The length counts are used for other purposes as well, i.e. finding + the minimum and maximum length codes, determining if there are any + codes at all, checking for a valid set of lengths, and looking ahead + at length counts to determine sub-table sizes when building the + decoding tables. + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ + for (len = 0; len <= MAXBITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; + for (max = MAXBITS; max >= 1; max--) + if (count[max] != 0) break; + if (root > max) root = max; + if (max == 0) { /* no symbols to code at all */ + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)1; + here.val = (unsigned short)0; + *(*table)++ = here; /* make a table to force an error */ + *(*table)++ = here; + *bits = 1; + return 0; /* no symbols, but wait for decoding to report error */ + } + for (min = 1; min < max; min++) + if (count[min] != 0) break; + if (root < min) root = min; + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ + } + if (left > 0 && (type == CODES || max != 1)) + return -1; /* incomplete set */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ + for (sym = 0; sym < codes; sym++) + if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; + + /* + Create and fill in decoding tables. In this loop, the table being + filled is at next and has curr index bits. The code being used is huff + with length len. That code is converted to an index by dropping drop + bits off of the bottom. For codes where len is less than drop + curr, + those top drop + curr - len bits are incremented through all values to + fill the table with replicated entries. + + root is the number of index bits for the root table. When len exceeds + root, sub-tables are created pointed to by the root entry with an index + of the low root bits of huff. This is saved in low to check for when a + new sub-table should be started. drop is zero when the root table is + being filled, and drop is root when sub-tables are being filled. + + When a new sub-table is needed, it is necessary to look ahead in the + code lengths to determine what size sub-table is needed. The length + counts are used for this, and so count[] is decremented as codes are + entered in the tables. + + used keeps track of how many table entries have been allocated from the + provided *table space. It is checked for LENS and DIST tables against + the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in + the initial root table size constants. See the comments in inftrees.h + for more information. + + sym increments through all symbols, and the loop terminates when + all codes of length max, i.e. all codes, have been processed. This + routine permits incomplete codes, so another loop after this one fills + in the rest of the decoding tables with invalid code markers. + */ + + /* set up for code type */ + switch (type) { + case CODES: + base = extra = work; /* dummy value--not used */ + match = 20; + break; + case LENS: + base = lbase; + extra = lext; + match = 257; + break; + default: /* DISTS */ + base = dbase; + extra = dext; + match = 0; + } + + /* initialize state for loop */ + huff = 0; /* starting code */ + sym = 0; /* starting code symbol */ + len = min; /* starting code length */ + next = *table; /* current table to fill in */ + curr = root; /* current table index bits */ + drop = 0; /* current bits to drop from code for index */ + low = (unsigned)(-1); /* trigger new sub-table when len > root */ + used = 1U << root; /* use root table entries */ + mask = used - 1; /* mask for comparing low */ + + /* check available table space */ + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* process all codes and make table entries */ + for (;;) { + /* create table entry */ + here.bits = (unsigned char)(len - drop); + if (work[sym] + 1U < match) { + here.op = (unsigned char)0; + here.val = work[sym]; + } + else if (work[sym] >= match) { + here.op = (unsigned char)(extra[work[sym] - match]); + here.val = base[work[sym] - match]; + } + else { + here.op = (unsigned char)(32 + 64); /* end of block */ + here.val = 0; + } + + /* replicate for those indices with low len bits equal to huff */ + incr = 1U << (len - drop); + fill = 1U << curr; + min = fill; /* save offset to next table */ + do { + fill -= incr; + next[(huff >> drop) + fill] = here; + } while (fill != 0); + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + + /* go to next symbol, update count, len */ + sym++; + if (--(count[len]) == 0) { + if (len == max) break; + len = lens[work[sym]]; + } + + /* create new sub-table if needed */ + if (len > root && (huff & mask) != low) { + /* if first time, transition to sub-tables */ + if (drop == 0) + drop = root; + + /* increment past last table */ + next += min; /* here min is 1 << curr */ + + /* determine length of next table */ + curr = len - drop; + left = (int)(1 << curr); + while (curr + drop < max) { + left -= count[curr + drop]; + if (left <= 0) break; + curr++; + left <<= 1; + } + + /* check for enough space */ + used += 1U << curr; + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* point entry in root table to sub-table */ + low = huff & mask; + (*table)[low].op = (unsigned char)curr; + (*table)[low].bits = (unsigned char)root; + (*table)[low].val = (unsigned short)(next - *table); + } + } + + /* fill in remaining table entry if code is incomplete (guaranteed to have + at most one remaining entry, since if the code is incomplete, the + maximum code length that was allowed to get this far is one bit) */ + if (huff != 0) { + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)(len - drop); + here.val = (unsigned short)0; + next[huff] = here; + } + + /* set return parameters */ + *table += used; + *bits = root; + return 0; +} diff --git a/cores/saturn/deps/zlib/inftrees.h b/cores/saturn/deps/zlib/inftrees.h new file mode 100644 index 000000000..baa53a0b1 --- /dev/null +++ b/cores/saturn/deps/zlib/inftrees.h @@ -0,0 +1,62 @@ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-2005, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Structure for decoding tables. Each entry provides either the + information needed to do the operation requested by the code that + indexed that table entry, or it provides a pointer to another + table that indexes more bits of the code. op indicates whether + the entry is a pointer to another table, a literal, a length or + distance, an end-of-block, or an invalid code. For a table + pointer, the low four bits of op is the number of index bits of + that table. For a length or distance, the low four bits of op + is the number of extra bits to get after the code. bits is + the number of bits in this code or part of the code to drop off + of the bit buffer. val is the actual byte to output in the case + of a literal, the base length or distance, or the offset from + the current table to the next table. Each entry is four bytes. */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ +} code; + +/* op values as set by inflate_table(): + 00000000 - literal + 0000tttt - table link, tttt != 0 is the number of table index bits + 0001eeee - length or distance, eeee is the number of extra bits + 01100000 - end of block + 01000000 - invalid code + */ + +/* Maximum size of the dynamic table. The maximum number of code structures is + 1444, which is the sum of 852 for literal/length codes and 592 for distance + codes. These values were found by exhaustive searches using the program + examples/enough.c found in the zlib distribtution. The arguments to that + program are the number of symbols, the initial root table size, and the + maximum bit length of a code. "enough 286 9 15" for literal/length codes + returns returns 852, and "enough 30 6 15" for distance codes returns 592. + The initial root table size (9 or 6) is found in the fifth argument of the + inflate_table() calls in inflate.c and infback.c. If the root table size is + changed, then these maximum sizes would be need to be recalculated and + updated. */ +#define ENOUGH_LENS 852 +#define ENOUGH_DISTS 592 +#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS) + +/* Type of code to build for inflate_table() */ +typedef enum { + CODES, + LENS, + DISTS +} codetype; + +int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work)); diff --git a/cores/saturn/deps/zlib/zconf.h b/cores/saturn/deps/zlib/zconf.h new file mode 100644 index 000000000..9d5563e88 --- /dev/null +++ b/cores/saturn/deps/zlib/zconf.h @@ -0,0 +1,274 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +#include + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". + */ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols and init macros */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_z z_adler32_z +# define crc32 z_crc32 +# define crc32_z z_crc32_z +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit z_inflateBackInit +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCodesUsed z_inflateCodesUsed +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateInit z_inflateInit +# define inflateInit2 z_inflateInit2 +# define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 +# define inflateResetKeep z_inflateResetKeep +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table + +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef +# define alloc_func z_alloc_func +# define charf z_charf +# define free_func z_free_func +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp +# define in_func z_in_func +# define intf z_intf +# define out_func z_out_func +# define uInt z_uInt +# define uIntf z_uIntf +# define uLong z_uLong +# define uLongf z_uLongf +# define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#define z_const const + +typedef size_t z_size_t; + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus about 7 kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +#ifndef Z_ARG /* function prototypes for stdarg */ +#define Z_ARG(args) args +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +typedef Byte FAR Bytef; +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +typedef unsigned long z_crc_t; + +#include /* for off_t */ +#include /* for va_list */ +#include /* for wchar_t */ + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#ifdef _WIN32 +#include +#else +#include /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +#endif + +#ifndef z_off_t +#define z_off_t off_t +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#ifndef z_off_t +# define z_off_t long +#endif + +#if !defined(_WIN32) && defined(Z_LARGE64) +#define z_off64_t off64_t +#else +#define z_off64_t z_off_t +#endif + +#endif /* ZCONF_H */ diff --git a/cores/saturn/deps/zlib/zlib.h b/cores/saturn/deps/zlib/zlib.h new file mode 100644 index 000000000..dff9e793a --- /dev/null +++ b/cores/saturn/deps/zlib/zlib.h @@ -0,0 +1,1065 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.11, January 15th, 2017 + + Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.2.11" +#define ZLIB_VERNUM 0x12b0 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 2 +#define ZLIB_VER_REVISION 11 +#define ZLIB_VER_SUBREVISION 0 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip and raw deflate streams in + memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in the case of corrupted input. +*/ + +typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +typedef void (*free_func) OF((voidpf opaque, voidpf address)); + +struct internal_state; + +typedef struct z_stream_s { + z_const Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total number of input bytes read so far */ + + Bytef *next_out; /* next output byte will go here */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total number of bytes output so far */ + + z_const char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text + for deflate, or the decoding state for inflate */ + uLong adler; /* Adler-32 or CRC-32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. In that case, zlib is thread-safe. When zalloc and zfree are + Z_NULL on entry to the initialization function, they are set to internal + routines that use the standard library functions malloc() and free(). + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use by the decompressor (particularly + if the decompressor wants to decompress everything in a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +#define Z_TREES 6 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field for deflate() */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +/* basic functions */ + +/* +ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary. Some output may be provided even if + flush is zero. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. See deflatePending(), + which can be used if desired to determine whether or not there is more ouput + in that case. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumulate before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed + codes block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six to avoid repeated flush markers due to + avail_out == 0 on return. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space. If deflate returns with Z_OK or Z_BUF_ERROR, this + function must be called again with Z_FINISH and more output space (updated + avail_out) but no more input data, until it returns with Z_STREAM_END or an + error. After deflate has returned Z_STREAM_END, the only possible operations + on the stream are deflateReset or deflateEnd. + + Z_FINISH can be used in the first deflate call after deflateInit if all the + compression is to be done in a single step. In order to complete in one + call, avail_out must be at least the value returned by deflateBound (see + below). Then deflate is guaranteed to return Z_STREAM_END. If not enough + output space is provided, deflate will not return Z_STREAM_END, and it must + be called again as described above. + + deflate() sets strm->adler to the Adler-32 checksum of all input read + so far (that is, total_in bytes). If a gzip stream is being generated, then + strm->adler will be the CRC-32 checksum of the input read so far. (See + deflateInit2 below.) + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). If in doubt, the data is + considered binary. This field is only for information purposes and does not + affect the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was Z_NULL or the state was inadvertently written over + by the application), or Z_BUF_ERROR if no progress is possible (for example + avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and + deflate() can be called again with more input and more output space to + continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. In the current version of inflate, the provided input is not + read or consumed. The allocation of a sliding window will be deferred to + the first call of inflate (if the decompression does not complete on the + first call). If zalloc and zfree are set to Z_NULL, inflateInit updates + them to use default allocation functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression. + Actual decompression will be done by inflate(). So next_in, and avail_in, + next_out, and avail_out are unused and unchanged. The current + implementation of inflateInit() does not process any header information -- + that is deferred until inflate() is called. +*/ + + +ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), then next_in and avail_in are updated + accordingly, and processing will resume at this point for the next call of + inflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. If the + caller of inflate() does not provide both available input and available + output space, it is possible that there will be no progress made. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + To assist in this, on return inflate() always sets strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the Adler-32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed Adler-32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained unless inflateGetHeader() is used. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + produced so far. The CRC-32 is checked against the gzip trailer, as is the + uncompressed length, modulo 2^32. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value, in which case strm->msg points to a string with a more specific + error), Z_STREAM_ERROR if the stream structure was inconsistent (for example + next_in or next_out was Z_NULL, or the state was inadvertently written over + by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR + if no progress was possible or if there was not enough room in the output + buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is to be attempted. +*/ + + +ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state + was inconsistent. +*/ + + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy)); + + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by the + caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + For the current implementation of deflate(), a windowBits value of 8 (a + window size of 256 bytes) is not supported. As a result, a request for 8 + will result in 9 (a 512-byte window). In that case, providing 8 to + inflateInit2() will result in an error when the zlib header with 9 is + checked against the initialization of inflate(). The remedy is to not use 8 + with deflateInit2() with this initialization, or at least in that case use 9 + with inflateInit2(). + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute a check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to the appropriate value, + if the operating system was determined at compile time. If a gzip stream is + being written, strm->adler is a CRC-32 instead of an Adler-32. + + For raw deflate or gzip encoding, a request for a 256-byte window is + rejected as invalid, since only the zlib header provides a means of + transmitting the window size to the decompressor. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the Adler-32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The Adler-32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + Adler-32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); +/* + Returns the sliding dictionary being maintained by deflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If deflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + deflateGetDictionary() may return a length less than the window size, even + when more than the window size in input has been provided. It may return up + to 258 bytes less in that case, due to how zlib's implementation of deflate + manages the sliding window and lookahead for matches, where matches can be + up to 258 bytes long. If the application needs the last window-size bytes of + input, then that would need to be saved by the application outside of zlib. + + deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, + z_streamp source)); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); +/* + This function is equivalent to deflateEnd followed by deflateInit, but + does not free and reallocate the internal compression state. The stream + will leave the compression level and any other attributes that may have been + set unchanged. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, + int level, + int strategy)); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2(). This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different strategy. + If the compression approach (which is a function of the level) or the + strategy is changed, and if any input has been consumed in a previous + deflate() call, then the input available so far is compressed with the old + level and strategy using deflate(strm, Z_BLOCK). There are three approaches + for the compression levels 0, 1..3, and 4..9 respectively. The new level + and strategy will take effect at the next call of deflate(). + + If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does + not have enough output space to complete, then the parameter change will not + take effect. In this case, deflateParams() can be called again with the + same parameters and more output space to try again. + + In order to assure a change in the parameters on the first try, the + deflate stream should be flushed using deflate() with Z_BLOCK or other flush + request until strm.avail_out is not zero, before calling deflateParams(). + Then no more input data should be provided before the deflateParams() call. + If this is done, the old level and strategy will be applied to the data + compressed before deflateParams(), and the new level and strategy will be + applied to the the data compressed after deflateParams(). + + deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream + state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if + there was not enough output space to complete the compression of the + available input data before a change in the strategy or approach. Note that + in the case of a Z_BUF_ERROR, the parameters are not changed. A return + value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be + retried with more output space. +*/ + +ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain)); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, + uLong sourceLen)); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. +*/ + +ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm, + unsigned *pending, + int *bits)); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + +ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, + int bits, + int value)); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, + gz_headerp head)); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to 255, with no extra, name, or comment + fields. The gzip header is returned to the default state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, + int windowBits)); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an Adler-32 or a CRC-32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see + below), inflate() will not automatically decode concatenated gzip streams. + inflate() will return Z_STREAM_END at the end of the gzip stream. The state + would need to be reset to continue decoding a subsequent gzip stream. + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dictLength)); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the Adler-32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect Adler-32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); +/* + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm, + int windowBits)); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. If the window size is changed, then the + memory allocated for the window is freed, and the window will be reallocated + by inflate() if needed. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, + unsigned char FAR *window)); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. +*/ + +typedef unsigned (*in_func) OF((void FAR *, + z_const unsigned char FAR * FAR *)); +typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); + +ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +/* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the compression + library. +*/ + +ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is Z_NULL, this function returns the + required initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed + much faster. + + Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ZEXTERN uLong ZEXPORT adler32_z OF((uLong adler, const Bytef *buf, + z_size_t len)); +/* + Same as adler32(), but with a size_t length. +*/ + +ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. If buf is Z_NULL, this function returns the required + initial value for the crc. Pre- and post-conditioning (one's complement) is + performed within this function so it shouldn't be done by the application. + + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT crc32_z OF((uLong adler, const Bytef *buf, + z_size_t len)); +/* + Same as crc32(), but with a size_t length. +*/ + +/* various hacks, don't look :) */ + +/* inflateInit is a macro to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits, + const char *version, int stream_size)); +ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size)); +#ifdef Z_PREFIX_SET +# define z_inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define z_inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#else +# define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#endif + +/* undocumented functions */ +ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp)); + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ diff --git a/cores/saturn/deps/zlib/zutil.h b/cores/saturn/deps/zlib/zutil.h new file mode 100644 index 000000000..848853f59 --- /dev/null +++ b/cores/saturn/deps/zlib/zutil.h @@ -0,0 +1,91 @@ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef ZUTIL_H +#define ZUTIL_H + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include "zlib.h" + +#include +#include +#include + +#ifndef local +# define local static +#endif +/* since "static" is used to mean two completely different things in C, we + define "local" for the non-static meaning of "static", for readability + (compile with -Dlocal if your debugger can't find static symbols) */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + +/* common defaults */ +/* functions */ + +#define zmemcpy memcpy +#define zmemcmp memcmp +#define zmemzero(dest, len) memset(dest, 0, len) + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +/* Reverse the bytes in a 32-bit value */ +#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ + (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) + +#endif /* ZUTIL_H */ diff --git a/cores/saturn/disc.cpp b/cores/saturn/disc.cpp new file mode 100644 index 000000000..439023ea6 --- /dev/null +++ b/cores/saturn/disc.cpp @@ -0,0 +1,827 @@ + +#include "libretro.h" +#include +#include + +#include "mednafen/mednafen-types.h" +#include "mednafen/git.h" +#include "mednafen/general.h" +#include "mednafen/cdrom/cdromif.h" +#include "mednafen/hash/md5.h" +#include "mednafen/hash/sha256.h" +#include "mednafen/ss/ss.h" +#include "mednafen/ss/cdb.h" +#include "mednafen/ss/smpc.h" + +// Forward declarations +extern "C"{ +RFILE* rfopen(const char *path, const char *mode); +char *rfgets(char *buffer, int maxCount, RFILE* stream); +int rfclose(RFILE* stream); +} + +//------------------------------------------------------------------------------ +// Locals +//------------------------------------------------------------------------------ + +static bool g_eject_state; + +static int g_current_disc; + +static unsigned g_initial_disc; +static std::string g_initial_disc_path; + +static std::vector CDInterfaces; + +static std::vector disk_image_paths; +static std::vector disk_image_labels; + +// +// Remember to rebuild region database in db.cpp if changing the order of +// entries in this table(and be careful about game id collisions, +// e.g. with some Korean games). +// +static const struct +{ + const char c; + const char* str; // Community-defined region string that may appear in filename. + unsigned region; +} +region_strings[] = +{ + // Listed in order of preference for multi-region games. + { 'U', "USA", SMPC_AREA_NA }, + { 'J', "Japan", SMPC_AREA_JP }, + { 'K', "Korea", SMPC_AREA_KR }, + + { 'E', "Europe", SMPC_AREA_EU_PAL }, + { 'E', "Germany", SMPC_AREA_EU_PAL }, + { 'E', "France", SMPC_AREA_EU_PAL }, + { 'E', "Spain", SMPC_AREA_EU_PAL }, + + { 'B', "Brazil", SMPC_AREA_CSA_NTSC }, + + { 'T', "Asia_NTSC", SMPC_AREA_ASIA_NTSC }, + { 'A', "Asia_PAL", SMPC_AREA_ASIA_PAL }, + { 'L', "CSA_PAL", SMPC_AREA_CSA_PAL }, +}; + + +//------------------------------------------------------------------------------ +// Local Functions +//------------------------------------------------------------------------------ + +static void ReadM3U( std::vector &file_list, std::string path, unsigned depth = 0 ) +{ + std::string dir_path; + char linebuf[ 2048 ]; + RFILE *fp = rfopen(path.c_str(), "rb"); + if (!fp) + return; + + MDFN_GetFilePathComponents(path, &dir_path); + + while(rfgets(linebuf, sizeof(linebuf), fp) != NULL) + { + std::string efp; + + if(linebuf[0] == '#') + continue; + string_trim_whitespace_right(linebuf); + if(linebuf[0] == 0) + continue; + + efp = MDFN_EvalFIP(dir_path, std::string(linebuf)); + if(efp.size() >= 4 && efp.substr(efp.size() - 4) == ".m3u") + { + if(efp == path) + { + log_cb(RETRO_LOG_ERROR, "M3U at \"%s\" references self.\n", efp.c_str()); + goto end; + } + + if(depth == 99) + { + log_cb(RETRO_LOG_ERROR, "M3U load recursion too deep!\n"); + goto end; + } + + ReadM3U(file_list, efp, depth++); + } + else + { + file_list.push_back(efp); + } + } + +end: + rfclose(fp); +} + +static bool IsSaturnDisc( const uint8* sa32k ) +{ + if(sha256(&sa32k[0x100], 0xD00) != "96b8ea48819cfa589f24c40aa149c224c420dccf38b730f00156efe25c9bbc8f"_sha256) + return false; + + if(memcmp(&sa32k[0], "SEGA SEGASATURN ", 16)) + return false; + + log_cb(RETRO_LOG_INFO, "This is a Saturn disc.\n"); + return true; +} + +static bool disk_set_eject_state( bool ejected ) +{ + if ( ejected == g_eject_state ) + { + // no change + return false; + } + else + { + // store + g_eject_state = ejected; + + if ( ejected ) + { + // open disc tray + CDB_SetDisc( true, NULL ); + } + else + { + // close the tray - with a disc inside + if ( g_current_disc < CDInterfaces.size() ) { + CDB_SetDisc( false, CDInterfaces[g_current_disc] ); + } else { + CDB_SetDisc( false, NULL ); + } + } + + return true; + } +} + +static bool disk_get_eject_state(void) +{ + return g_eject_state; +} + +static bool disk_set_image_index(unsigned index) +{ + // only listen if the tray is open + if ( g_eject_state == true ) + { + if ( index < CDInterfaces.size() ) { + // log_cb(RETRO_LOG_INFO, "Selected disc %d of %d.\n", index+1, CDInterfaces.size() ); + g_current_disc = index; + return true; + } + } + + return false; +} + +static unsigned disk_get_num_images(void) +{ + return CDInterfaces.size(); +} + +/* +#if 0 +// Mednafen really doesn't support adding disk images on the fly ... +// Hack around this. +static void update_md5_checksum(CDIF *iface) +{ + uint8 LayoutMD5[16]; + md5_context layout_md5; + CD_TOC toc; + + md5_starts(&layout_md5); + + TOC_Clear(&toc); + + iface->ReadTOC(&toc); + + md5_update_u32_as_lsb(&layout_md5, toc.first_track); + md5_update_u32_as_lsb(&layout_md5, toc.last_track); + md5_update_u32_as_lsb(&layout_md5, toc.tracks[100].lba); + + for (uint32 track = toc.first_track; track <= toc.last_track; track++) + { + md5_update_u32_as_lsb(&layout_md5, toc.tracks[track].lba); + md5_update_u32_as_lsb(&layout_md5, toc.tracks[track].control & 0x4); + } + + md5_finish(&layout_md5, LayoutMD5); + memcpy(MDFNGameInfo->MD5, LayoutMD5, 16); + + char *md5 = md5_asciistr(MDFNGameInfo->MD5); + log_cb(RETRO_LOG_INFO, "[Mednafen]: Updated md5 checksum: %s.\n", md5); +} +#endif +*/ +static bool disk_replace_image_index(unsigned index, const struct retro_game_info *info) +{ + log_cb(RETRO_LOG_INFO, "disk_replace_image_index(%d,*info) called.\n", index); + return false; + + // todo - untested + +#if 0 + if (index >= disk_get_num_images() || !g_eject_state) + return false; + + if (!info) + { + delete CDInterfaces.at(index); + CDInterfaces.erase(CDInterfaces.begin() + index); + if (index < CD_SelectedDisc) + CD_SelectedDisc--; + + disk_image_paths.erase(disk_image_paths.begin() + index); + disk_image_labels.erase(disk_image_labels.begin() + index); + + // Poke into psx.cpp + CalcDiscSCEx(); + return true; + } + + bool success = true; + CDIF *iface = CDIF_Open(&success, info->path, false, false); + + if (!success) + return false; + + delete CDInterfaces.at(index); + CDInterfaces.at(index) = iface; + CalcDiscSCEx(); + + /* If we replace, we want the "swap disk manually effect". */ + extract_basename(retro_cd_base_name, info->path, sizeof(retro_cd_base_name)); + /* Ugly, but needed to get proper disk swapping effect. */ + update_md5_checksum(iface); + + /* Update disk path/label vectors */ + disk_image_paths[index] = info->path; + disk_image_labels[index] = retro_cd_base_name; + + return true; +#endif +} + +static bool disk_add_image_index(void) +{ + log_cb(RETRO_LOG_INFO, "disk_add_image_index called.\n"); + + CDInterfaces.push_back(NULL); + disk_image_paths.push_back(""); + disk_image_labels.push_back(""); + return true; +} + +static bool disk_set_initial_image(unsigned index, const char *path) +{ + if (string_is_empty(path)) + return false; + + g_initial_disc = index; + g_initial_disc_path = path; + + return true; +} + +static bool disk_get_image_path(unsigned index, char *path, size_t len) +{ + if (len < 1) + return false; + + if ((index < CDInterfaces.size()) && + (index < disk_image_paths.size())) + { + if (!string_is_empty(disk_image_paths[index].c_str())) + { + strlcpy(path, disk_image_paths[index].c_str(), len); + return true; + } + } + + return false; +} + +static bool disk_get_image_label(unsigned index, char *label, size_t len) +{ + if (len < 1) + return false; + + if ((index < CDInterfaces.size()) && + (index < disk_image_labels.size())) + { + if (!string_is_empty(disk_image_labels[index].c_str())) + { + strlcpy(label, disk_image_labels[index].c_str(), len); + return true; + } + } + + return false; +} + +//------------------------------------------------------------------------------ +// Global Functions +//------------------------------------------------------------------------------ + +/* This has to be 'global', since we need to + * access the current disk index inside + * libretro.cpp */ +unsigned disk_get_image_index(void) +{ + return g_current_disc; +} + +static struct retro_disk_control_callback disk_interface = +{ + disk_set_eject_state, + disk_get_eject_state, + disk_get_image_index, + disk_set_image_index, + disk_get_num_images, + disk_replace_image_index, + disk_add_image_index, +}; + +static struct retro_disk_control_ext_callback disk_interface_ext = +{ + disk_set_eject_state, + disk_get_eject_state, + disk_get_image_index, + disk_set_image_index, + disk_get_num_images, + disk_replace_image_index, + disk_add_image_index, + disk_set_initial_image, + disk_get_image_path, + disk_get_image_label, +}; + +void extract_basename(char *buf, const char *path, size_t size) +{ + const char *base = strrchr(path, '/'); + if (!base) + base = strrchr(path, '\\'); + if (!base) + base = path; + + if (*base == '\\' || *base == '/') + base++; + + strncpy(buf, base, size - 1); + buf[size - 1] = '\0'; + + char *ext = strrchr(buf, '.'); + if (ext) + *ext = '\0'; +} + +void extract_directory(char *buf, const char *path, size_t size) +{ + strncpy(buf, path, size - 1); + buf[size - 1] = '\0'; + + char *base = strrchr(buf, '/'); + if (!base) + base = strrchr(buf, '\\'); + + if (base) + *base = '\0'; + else + buf[0] = '\0'; +} + +void disc_init( retro_environment_t environ_cb ) +{ + unsigned dci_version = 0; + + // start closed + g_eject_state = false; + + g_initial_disc = 0; + g_initial_disc_path.clear(); + + // register vtable with environment + if (environ_cb(RETRO_ENVIRONMENT_GET_DISK_CONTROL_INTERFACE_VERSION, &dci_version) && (dci_version >= 1)) + environ_cb(RETRO_ENVIRONMENT_SET_DISK_CONTROL_EXT_INTERFACE, &disk_interface_ext); + else + environ_cb(RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE, &disk_interface); +} + +static INLINE bool MDFN_isspace(const char c) { return c == ' ' || c == '\f' || c == '\r' || c == '\n' || c == '\t' || c == '\v'; } + +// Remove whitespace from beginning of s +static void MDFN_ltrim(char* s) +{ + const char* si = s; + char* di = s; + bool InWhitespace = true; + + while(*si) + { + if(!InWhitespace || !MDFN_isspace(*si)) + { + InWhitespace = false; + *di = *si; + di++; + } + si++; + } + + *di = 0; +} + +// Remove whitespace from end of s +static void MDFN_rtrim(char* s) +{ + const size_t len = strlen(s); + + if(!len) + return; + // + size_t x = len; + + do + { + x--; + + if(!MDFN_isspace(s[x])) + break; + + s[x] = 0; + } while(x); +} + +static void MDFN_trim(char* s) +{ + MDFN_rtrim(s); + MDFN_ltrim(s); +} + +static void MDFN_zapctrlchars(char* s) +{ + if(!s) + return; + + while(*s) + { + if((unsigned char)*s < 0x20) + *s = ' '; + + s++; + } +} + +static void CalcGameID( uint8* id_out16, uint8* fd_id_out16, char* sgid, char* sgname, char* sgarea ) +{ + md5_context mctx; + uint8_t buf[2048]; + + log_cb(RETRO_LOG_INFO, "Calculating game ID (%d discs)\n", CDInterfaces.size() ); + + mctx.starts(); + + for(size_t x = 0; x < CDInterfaces.size(); x++) + { + CDIF *c = CDInterfaces[x]; + TOC toc; + + c->ReadTOC(&toc); + + mctx.update_u32_as_lsb(toc.first_track); + mctx.update_u32_as_lsb(toc.last_track); + mctx.update_u32_as_lsb(toc.disc_type); + + for(unsigned i = 1; i <= 100; i++) + { + const auto& t = toc.tracks[i]; + + mctx.update_u32_as_lsb(t.adr); + mctx.update_u32_as_lsb(t.control); + mctx.update_u32_as_lsb(t.lba); + mctx.update_u32_as_lsb(t.valid); + } + + for(unsigned i = 0; i < 512; i++) + { + if(c->ReadSector((uint8_t*)&buf[0], i, 1) >= 0x1) + { + if(i == 0) + { + char* tmp; + memcpy(sgid, (void*)(&buf[0x20]), 16); + sgid[16] = 0; + if((tmp = strrchr(sgid, 'V'))) + { + do + { + *tmp = 0; + } while(tmp-- != sgid && (signed char)*tmp <= 0x20); + memcpy(sgname, &buf[0x60], 0x70); + sgname[0x70] = 0; + MDFN_zapctrlchars(sgname); + MDFN_trim(sgname); + + memcpy(sgarea, &buf[0x40], 0x10); + sgarea[0x10] = 0; + MDFN_zapctrlchars(sgarea); + MDFN_trim(sgarea); + } + } + + mctx.update(&buf[0], 2048); + } + } + + if(x == 0) + { + md5_context fd_mctx = mctx; + fd_mctx.finish(fd_id_out16); + } + } + + mctx.finish(id_out16); +} + +void disc_cleanup(void) +{ + for(unsigned i = 0; i < CDInterfaces.size(); i++) { + delete CDInterfaces[i]; + } + CDInterfaces.clear(); + + disk_image_paths.clear(); + disk_image_labels.clear(); + + g_current_disc = 0; +} + +bool DetectRegion( unsigned* region ) +{ + uint8_t *buf = new uint8[2048 * 16]; + uint64 possible_regions = 0; + + for(auto& c : CDInterfaces) + { + if(c->ReadSector(&buf[0], 0, 16) != 0x1) + continue; + + if(!IsSaturnDisc(&buf[0])) + continue; + + for(unsigned i = 0; i < 16; i++) + { + for(auto const& rs : region_strings) + { + if(rs.c == buf[0x40 + i]) + { + possible_regions |= (uint64)1 << rs.region; + break; + } + } + } + + break; + } + + delete[] buf; + + for(auto const& rs : region_strings) + { + if(possible_regions & ((uint64)1 << rs.region)) + { + log_cb(RETRO_LOG_INFO, "Disc Region: \"%s\"\n", rs.str ); + *region = rs.region; + return true; + } + } + + return false; +} + +bool DiscSanityChecks(void) +{ + size_t i; + + // For each disc + for( i = 0; i < CDInterfaces.size(); i++ ) + { + TOC toc; + + CDInterfaces[i]->ReadTOC(&toc); + + // For each track + for( int32 track = 1; track <= 99; track++) + { + if(!toc.tracks[track].valid) + continue; + + if(toc.tracks[track].control & SUBQ_CTRLF_DATA) + continue; + + // + // + // + + const int32 start_lba = toc.tracks[track].lba; + const int32 end_lba = start_lba + 32 - 1; + bool any_subq_curpos = false; + + for(int32 lba = start_lba; lba <= end_lba; lba++) + { + uint8 pwbuf[96]; + uint8 qbuf[12]; + + if(!CDInterfaces[i]->ReadRawSectorPWOnly(pwbuf, lba, false)) + { + log_cb(RETRO_LOG_ERROR, + "Testing Disc %zu of %zu: Error reading sector at LBA %d.\n", + i + 1, CDInterfaces.size(), lba ); + return false; + } + + subq_deinterleave(pwbuf, qbuf); + if(subq_check_checksum(qbuf) && (qbuf[0] & 0xF) == ADR_CURPOS) + { + const uint8 qm = qbuf[7]; + const uint8 qs = qbuf[8]; + const uint8 qf = qbuf[9]; + uint8 lm, ls, lf; + + any_subq_curpos = true; + + LBA_to_AMSF(lba, &lm, &ls, &lf); + lm = U8_to_BCD(lm); + ls = U8_to_BCD(ls); + lf = U8_to_BCD(lf); + + if(lm != qm || ls != qs || lf != qf) + { + log_cb(RETRO_LOG_ERROR, + "Testing Disc %zu of %zu: Time mismatch at LBA=%d(%02x:%02x:%02x); Q subchannel: %02x:%02x:%02x\n", + i + 1, CDInterfaces.size(), + lba, + lm, ls, lf, + qm, qs, qf); + + return false; + } + } + } + + if(!any_subq_curpos) + { + log_cb(RETRO_LOG_ERROR, + "Testing Disc %zu of %zu: No valid Q subchannel ADR_CURPOS data present at LBA %d-%d?!\n", + i + 1, CDInterfaces.size(), + start_lba, end_lba ); + return false; + } + + break; + + } // for each track + + } // for each disc + + return true; +} + +void disc_select( unsigned disc_num ) +{ + if ( disc_num < CDInterfaces.size() ) { + g_current_disc = disc_num; + CDB_SetDisc( false, CDInterfaces[ g_current_disc ] ); + } +} + +bool disc_load_content( MDFNGI* game_interface, const char* content_name, uint8* fd_id, char* sgid, char* sgname, char* sgarea, bool image_memcache ) +{ + disc_cleanup(); + + if ( !content_name ) + return false; + + uint8 LayoutMD5[ 16 ]; + + log_cb( RETRO_LOG_INFO, "Loading \"%s\"\n", content_name ); + + try + { + size_t content_name_len = strlen( content_name ); + if ( content_name_len > 4 ) + { + const char* content_ext = content_name + content_name_len - 4; + if ( !strcasecmp( content_ext, ".m3u" ) ) + { + // multiple discs + ReadM3U(disk_image_paths, content_name); + for(unsigned i = 0; i < disk_image_paths.size(); i++) + { + char image_label[4096]; + bool success = true; + image_label[0] = '\0'; + log_cb(RETRO_LOG_INFO, "Adding CD: \"%s\".\n", disk_image_paths[i].c_str()); + CDIF *image = CDIF_Open(disk_image_paths[i].c_str(), image_memcache); + CDInterfaces.push_back(image); + extract_basename( + image_label, + disk_image_paths[i].c_str(), + sizeof(image_label)); + disk_image_labels.push_back(image_label); + } + } + else + { + // single disc + char image_label[4096]; + bool success = true; + + image_label[0] = '\0'; + + disk_image_paths.push_back(content_name); + CDIF *image = CDIF_Open(content_name, image_memcache); + CDInterfaces.push_back(image); + + extract_basename( + image_label, + content_name, + sizeof(image_label)); + disk_image_labels.push_back(image_label); + } + + /* Attempt to set initial disk index */ + if ((g_initial_disc > 0) && + (g_initial_disc + < CDInterfaces.size())) + if (g_initial_disc + < disk_image_paths.size()) + if (string_is_equal( + disk_image_paths[ + g_initial_disc].c_str(), + g_initial_disc_path.c_str())) + g_current_disc = (int) + g_initial_disc; + } + } + catch( std::exception &e ) + { + log_cb(RETRO_LOG_ERROR, "Loading Failed.\n"); + return false; + } + + // Print out a track list for all discs. + for(unsigned i = 0; i < CDInterfaces.size(); i++) + { + TOC toc; + CDInterfaces[i]->ReadTOC(&toc); + log_cb(RETRO_LOG_DEBUG, "Disc %d\n", i + 1); + for(int32 track = toc.first_track; track <= toc.last_track; track++) { + log_cb(RETRO_LOG_DEBUG, "- Track %2d, LBA: %6d %s\n", track, toc.tracks[track].lba, (toc.tracks[track].control & 0x4) ? "DATA" : "AUDIO"); + } + log_cb(RETRO_LOG_DEBUG, "Leadout: %6d\n", toc.tracks[100].lba); + } + + log_cb(RETRO_LOG_DEBUG, "Calculating layout MD5.\n"); + // Calculate layout MD5. The system emulation LoadCD() code is free to ignore this value and calculate + // its own, or to use it to look up a game in its database. + { + md5_context layout_md5; + layout_md5.starts(); + + for( unsigned i = 0; i < CDInterfaces.size(); i++ ) + { + TOC toc; + + CDInterfaces[i]->ReadTOC(&toc); + + layout_md5.update_u32_as_lsb(toc.first_track); + layout_md5.update_u32_as_lsb(toc.last_track); + layout_md5.update_u32_as_lsb(toc.tracks[100].lba); + + for(uint32 track = toc.first_track; track <= toc.last_track; track++) + { + layout_md5.update_u32_as_lsb(toc.tracks[track].lba); + layout_md5.update_u32_as_lsb(toc.tracks[track].control & 0x4); + } + } + + layout_md5.finish(LayoutMD5); + } + log_cb(RETRO_LOG_DEBUG, "Done calculating layout MD5.\n"); + // TODO: include module name in hash + + memcpy( game_interface->MD5, LayoutMD5, 16 ); + + CalcGameID( game_interface->MD5, fd_id, sgid, sgname, sgarea ); + + return true; +} + +//============================================================================== diff --git a/cores/saturn/disc.h b/cores/saturn/disc.h new file mode 100644 index 000000000..3676f0e03 --- /dev/null +++ b/cores/saturn/disc.h @@ -0,0 +1,27 @@ +#ifndef __DISC_H__ +#define __DISC_H__ + +#include +#include "mednafen/git.h" +#include "mednafen/mednafen-types.h" + +extern void extract_basename(char *buf, const char *path, size_t size); +extern void extract_directory(char *buf, const char *path, size_t size); + +// These routines handle disc drive front-end. + +extern unsigned disk_get_image_index(void); + +void disc_init( retro_environment_t environ_cb ); + +void disc_cleanup(void); + +bool DetectRegion( unsigned* region ); + +bool DiscSanityChecks(void); + +void disc_select( unsigned disc_num ); + +bool disc_load_content( MDFNGI* game_inteface, const char *name, uint8* fd_id, char* sgid, char *sgname, char *sgarea, bool image_memcache ); + +#endif diff --git a/cores/saturn/input.cpp b/cores/saturn/input.cpp new file mode 100644 index 000000000..43b68557e --- /dev/null +++ b/cores/saturn/input.cpp @@ -0,0 +1,1748 @@ + +#include "libretro.h" +#include "libretro_settings.h" +#include "mednafen/mednafen-types.h" +#include "mednafen/ss/ss.h" +#include "mednafen/ss/smpc.h" +#include "mednafen/state.h" +#include +#include + +//------------------------------------------------------------------------------ +// Locals +//------------------------------------------------------------------------------ + +#define MAX_CONTROLLERS 12 /* 2x 6 player adaptors */ + +static retro_environment_t environ_cb; /* cached during input_init_env */ + +static unsigned players = 2; + +static int astick_deadzone = 0; +static int trigger_deadzone = 0; +static const int TRIGGER_MAX = 0xFFFF; +static float mouse_sensitivity = 1.0f; + +static unsigned geometry_width = 0; +static unsigned geometry_height = 0; + +static int pointer_pressed = 0; +static const int POINTER_PRESSED_CYCLES = 4; +static int pointer_cycles_after_released = 0; +static int pointer_pressed_last_x = 0; +static int pointer_pressed_last_y = 0; + +typedef union +{ + uint8_t u8[ 32 ]; + uint16_t gun_pos[ 2 ]; + uint16_t buttons; +} INPUT_DATA; + +// Controller state buffer (per player) +static INPUT_DATA input_data[ MAX_CONTROLLERS ] = {0}; + +// Controller type (per player) +static uint32_t input_type[ MAX_CONTROLLERS ] = {0}; + + +#define INPUT_MODE_3D_PAD_ANALOG ( 1 << 0 ) // Set means analog mode. +#define INPUT_MODE_3D_PAD_PREVIOUS_MASK ( 1 << 1 ) // Edge trigger helper. +#define INPUT_MODE_MISSION_THROTTLE_LATCH ( 1 << 2 ) // Latch throttle enabled? +#define INPUT_MODE_MISSION_THROTTLE_PREV ( 1 << 3 ) // Edge trigger helper. + +#define INPUT_MODE_DEFAULT 0 +#define INPUT_MODE_DEFAULT_3D_PAD INPUT_MODE_3D_PAD_ANALOG + +// Mode switch for 3D Control Pad (per player) +static uint16_t input_mode[ MAX_CONTROLLERS ] = {0}; +static int16_t input_throttle_latch[ MAX_CONTROLLERS ] = {0}; + +//------------------------------------------------------------------------------ +// Supported Devices +//------------------------------------------------------------------------------ + +#define RETRO_DEVICE_SS_PAD RETRO_DEVICE_SUBCLASS( RETRO_DEVICE_JOYPAD, 0 ) +#define RETRO_DEVICE_SS_3D_PAD RETRO_DEVICE_SUBCLASS( RETRO_DEVICE_ANALOG, 0 ) +#define RETRO_DEVICE_SS_WHEEL RETRO_DEVICE_SUBCLASS( RETRO_DEVICE_ANALOG, 1 ) +#define RETRO_DEVICE_SS_MISSION RETRO_DEVICE_SUBCLASS( RETRO_DEVICE_ANALOG, 2 ) +#define RETRO_DEVICE_SS_MISSION2 RETRO_DEVICE_SUBCLASS( RETRO_DEVICE_ANALOG, 3 ) +#define RETRO_DEVICE_SS_MOUSE RETRO_DEVICE_SUBCLASS( RETRO_DEVICE_MOUSE, 0 ) +#define RETRO_DEVICE_SS_TWINSTICK RETRO_DEVICE_SUBCLASS( RETRO_DEVICE_ANALOG, 4 ) +#define RETRO_DEVICE_SS_GUN_JP RETRO_DEVICE_SUBCLASS( RETRO_DEVICE_LIGHTGUN, 0 ) +#define RETRO_DEVICE_SS_GUN_US RETRO_DEVICE_SUBCLASS( RETRO_DEVICE_LIGHTGUN, 1 ) + +enum { INPUT_DEVICE_TYPES_COUNT = 1 /*none*/ + 9 }; /* <-- update me! */ + +static const struct retro_controller_description input_device_types[ INPUT_DEVICE_TYPES_COUNT ] = +{ + { "Control Pad", RETRO_DEVICE_JOYPAD }, + { "3D Control Pad", RETRO_DEVICE_SS_3D_PAD }, + { "Arcade Racer", RETRO_DEVICE_SS_WHEEL }, + { "Mission Stick", RETRO_DEVICE_SS_MISSION }, + { "Mouse", RETRO_DEVICE_SS_MOUSE }, + { "Stunner", RETRO_DEVICE_SS_GUN_US }, + { "Twin-Stick", RETRO_DEVICE_SS_TWINSTICK }, + { "Virtua Gun", RETRO_DEVICE_SS_GUN_JP }, + { "Dual Mission Sticks", RETRO_DEVICE_SS_MISSION2 }, /*"Panzer Dragoon Zwei" only!*/ + { NULL, 0 }, +}; + +static const struct retro_controller_info ports_no_6player[ 1 + 1 + 1 ] = +{ + /* port one */ + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + + /* port two */ + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + + { 0 }, +}; + +static const struct retro_controller_info ports_left_6player[ 6 + 1 + 1 ] = +{ + /* port one */ + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + + /* port two: 6player adaptor */ + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + + { 0 }, +}; + +static const struct retro_controller_info ports_right_6player[ 1 + 6 + 1 ] = +{ + /* port one */ + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + + /* port two: 6player adaptor */ + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + + { 0 }, +}; + +static const struct retro_controller_info ports_two_6player[ 6 + 6 + 1 ] = +{ + /* port one: 6player adaptor */ + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + + /* port two: 6player adaptor */ + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + { input_device_types, INPUT_DEVICE_TYPES_COUNT }, + + { 0 }, +}; + +/* Lookup table to select ports info for all combinations + of 6player adaptor connection. */ +static const struct retro_controller_info* ports_lut[ 4 ] = +{ + ports_no_6player, + ports_left_6player, + ports_right_6player, + ports_two_6player +}; + + +//------------------------------------------------------------------------------ +// Mapping Helpers +//------------------------------------------------------------------------------ + +/* Control Pad (default) */ +enum { INPUT_MAP_PAD_SIZE = 12 }; +static const unsigned input_map_pad[ INPUT_MAP_PAD_SIZE ] = +{ + // libretro input at position || maps to Saturn on bit + //----------------------------------------------------------------------------- + RETRO_DEVICE_ID_JOYPAD_L, // L1 -> Z 0 + RETRO_DEVICE_ID_JOYPAD_X, // X(top) -> Y 1 + RETRO_DEVICE_ID_JOYPAD_Y, // Y(left) -> X 2 + RETRO_DEVICE_ID_JOYPAD_R2, // R2 -> R 3 + RETRO_DEVICE_ID_JOYPAD_UP, // Pad-Up -> Pad-Up 4 + RETRO_DEVICE_ID_JOYPAD_DOWN, // Pad-Down -> Pad-Down 5 + RETRO_DEVICE_ID_JOYPAD_LEFT, // Pad-Left -> Pad-Left 6 + RETRO_DEVICE_ID_JOYPAD_RIGHT, // Pad-Right -> Pad-Right 7 + RETRO_DEVICE_ID_JOYPAD_A, // A(right) -> B 8 + RETRO_DEVICE_ID_JOYPAD_R, // R1 -> C 9 + RETRO_DEVICE_ID_JOYPAD_B, // B(down) -> A 10 + RETRO_DEVICE_ID_JOYPAD_START, // Start -> Start 11 +}; + +static const unsigned input_map_pad_left_shoulder = + RETRO_DEVICE_ID_JOYPAD_L2; // L2 -> L 15 + +/* 3D Control Pad */ +enum { INPUT_MAP_3D_PAD_SIZE = 11 }; +static const unsigned input_map_3d_pad[ INPUT_MAP_3D_PAD_SIZE ] = +{ + // libretro input at position || maps to Saturn on bit + //----------------------------------------------------------------------------- + RETRO_DEVICE_ID_JOYPAD_UP, // Pad-Up -> Pad-Up 0 + RETRO_DEVICE_ID_JOYPAD_DOWN, // Pad-Down -> Pad-Down 1 + RETRO_DEVICE_ID_JOYPAD_LEFT, // Pad-Left -> Pad-Left 2 + RETRO_DEVICE_ID_JOYPAD_RIGHT, // Pad-Right -> Pad-Right 3 + RETRO_DEVICE_ID_JOYPAD_A, // A(right) -> B 4 + RETRO_DEVICE_ID_JOYPAD_R, // R1 -> C 5 + RETRO_DEVICE_ID_JOYPAD_B, // B(down) -> A 6 + RETRO_DEVICE_ID_JOYPAD_START, // Start -> Start 7 + RETRO_DEVICE_ID_JOYPAD_L, // L1 -> Z 8 + RETRO_DEVICE_ID_JOYPAD_X, // X(top) -> Y 9 + RETRO_DEVICE_ID_JOYPAD_Y, // Y(left) -> X 10 +}; + +static const unsigned input_map_3d_pad_mode_switch = RETRO_DEVICE_ID_JOYPAD_SELECT; + +/* Arcade Racer (wheel) */ +enum { INPUT_MAP_WHEEL_BITSHIFT = 4 }; +enum { INPUT_MAP_WHEEL_SIZE = 7 }; +static const unsigned input_map_wheel[ INPUT_MAP_WHEEL_SIZE ] = +{ + // libretro input at position || maps to Saturn on bit + //----------------------------------------------------------------------------- + RETRO_DEVICE_ID_JOYPAD_A, // A(right) -> B 4 + RETRO_DEVICE_ID_JOYPAD_R, // R1 -> C 5 + RETRO_DEVICE_ID_JOYPAD_B, // B(down) -> A 6 + RETRO_DEVICE_ID_JOYPAD_START, // Start -> Start 7 + RETRO_DEVICE_ID_JOYPAD_L, // L1 -> Z 8 + RETRO_DEVICE_ID_JOYPAD_X, // X(top) -> Y 9 + RETRO_DEVICE_ID_JOYPAD_Y, // Y(left) -> X 10 +}; + +static const unsigned input_map_wheel_shift_left = RETRO_DEVICE_ID_JOYPAD_L2; +static const unsigned input_map_wheel_shift_right = RETRO_DEVICE_ID_JOYPAD_R2; + +/* Mission Stick */ +enum { INPUT_MAP_MISSION_SIZE = 8 }; +static const unsigned input_map_mission[ INPUT_MAP_MISSION_SIZE ] = +{ + // libretro input at position || maps to Saturn on bit + //----------------------------------------------------------------------------- + RETRO_DEVICE_ID_JOYPAD_A, // A(right) -> B 0 + RETRO_DEVICE_ID_JOYPAD_R, // R1 -> C 1 + RETRO_DEVICE_ID_JOYPAD_B, // B(down) -> A 2 + RETRO_DEVICE_ID_JOYPAD_START, // Start -> Start 3 + RETRO_DEVICE_ID_JOYPAD_L, // L1 -> Z 4 + RETRO_DEVICE_ID_JOYPAD_X, // X(top) -> Y 5 + RETRO_DEVICE_ID_JOYPAD_Y, // Y(left) -> X 6 + RETRO_DEVICE_ID_JOYPAD_R2, // R2 -> R 7 +}; + +static const unsigned input_map_mission_left_shoulder = + RETRO_DEVICE_ID_JOYPAD_L2; // L2 -> L 15 + +static const unsigned input_map_mission_throttle_latch = RETRO_DEVICE_ID_JOYPAD_R3; + +/* Twin-Stick */ +static const unsigned input_map_twinstick_left_trigger = RETRO_DEVICE_ID_JOYPAD_L2; +static const unsigned input_map_twinstick_left_button = RETRO_DEVICE_ID_JOYPAD_L; +static const unsigned input_map_twinstick_right_trigger = RETRO_DEVICE_ID_JOYPAD_R2; +static const unsigned input_map_twinstick_right_button = RETRO_DEVICE_ID_JOYPAD_R; + +//------------------------------------------------------------------------------ +// Local Functions +//------------------------------------------------------------------------------ + +static void get_analog_axis( retro_input_state_t input_state_cb, + int player_index, + int stick, + int axis, + int* p_analog ) +{ + int analog = input_state_cb( player_index, RETRO_DEVICE_ANALOG, stick, axis ); + + // Analog stick deadzone + if ( astick_deadzone > 0 ) + { + static const int ASTICK_MAX = 0x8000; + const float scale = ((float)ASTICK_MAX/(float)(ASTICK_MAX - astick_deadzone)); + + if ( analog < -astick_deadzone ) + { + // Re-scale analog stick range + float scaled = (-analog - astick_deadzone)*scale; + analog = (int)round(-scaled); + if (analog < -32767) + analog = -32767; + } + else if ( analog > astick_deadzone ) + { + // Re-scale analog stick range + float scaled = (analog - astick_deadzone)*scale; + analog = (int)round(scaled); + if (analog > +32767) + analog = +32767; + } + else + analog = 0; + } + + // output + *p_analog = analog; +} + +static void get_analog_stick( retro_input_state_t input_state_cb, + int player_index, + int stick, + int* p_analog_x, + int* p_analog_y ) +{ + int analog_x = input_state_cb( player_index, RETRO_DEVICE_ANALOG, stick, RETRO_DEVICE_ID_ANALOG_X ); + int analog_y = input_state_cb( player_index, RETRO_DEVICE_ANALOG, stick, RETRO_DEVICE_ID_ANALOG_Y ); + + // Analog stick deadzone (borrowed code from parallel-n64 core) + if ( astick_deadzone > 0 ) + { + static const int ASTICK_MAX = 0x8000; + + // Convert cartesian coordinate analog stick to polar coordinates + double radius = sqrt(analog_x * analog_x + analog_y * analog_y); + double angle = atan2(analog_y, analog_x); + + if (radius > astick_deadzone) + { + // Re-scale analog stick range to negate deadzone (makes slow movements possible) + radius = (radius - astick_deadzone)*((float)ASTICK_MAX/(ASTICK_MAX - astick_deadzone)); + + // Convert back to cartesian coordinates + analog_x = (int)round(radius * cos(angle)); + analog_y = (int)round(radius * sin(angle)); + + // Clamp to correct range + if (analog_x > +32767) analog_x = +32767; + if (analog_x < -32767) analog_x = -32767; + if (analog_y > +32767) analog_y = +32767; + if (analog_y < -32767) analog_y = -32767; + } + else + { + analog_x = 0; + analog_y = 0; + } + } + + // output + *p_analog_x = analog_x; + *p_analog_y = analog_y; +} + +static uint32_t apply_trigger_deadzone( uint32_t input ) +{ + // Scale by two and apply outer deadzone (about 1%) + input = ( input * 66191 ) / 32768; + + // Inner deadzone + if ( trigger_deadzone > 0 ) + { + const float scale = ((float)TRIGGER_MAX/(float)(TRIGGER_MAX - trigger_deadzone)); + + if ( input > trigger_deadzone ) + { + // Re-scale analog range + float scaled = (input - trigger_deadzone)*scale; + input = (int)round(scaled); + } + else + input = 0; + } + + // Clamp + if (input > TRIGGER_MAX) + input = TRIGGER_MAX; + + return input; +} + +static uint16_t get_analog_trigger( retro_input_state_t input_state_cb, + int player_index, + int id ) +{ + // NOTE: Analog triggers were added Nov 2017. Not all front-ends support this + // feature (or pre-date it) so we need to handle this in a graceful way. + + // First, try and get an analog value using the new libretro API constant + uint16_t trigger = input_state_cb( player_index, + RETRO_DEVICE_ANALOG, + RETRO_DEVICE_INDEX_ANALOG_BUTTON, + id ); + + if ( trigger == 0 ) + { + // If we got exactly zero, we're either not pressing the button, or the front-end + // is not reporting analog values. We need to do a second check using the classic + // digital API method, to at least get some response - better than nothing. + + // NOTE: If we're really just not holding the trigger, we're still going to get zero. + if (input_state_cb( player_index, + RETRO_DEVICE_JOYPAD, + 0, + id )) + return 0xFFFF; + return 0; + } + + // We got something, which means the front-end can handle analog buttons. + // So we apply a deadzone to the input and use it. + // Mednafen wants 0 - 65535 so we scale up from 0 - 32767 + return apply_trigger_deadzone( (unsigned)trigger ); +} + + +//------------------------------------------------------------------------------ +// Global Functions +//------------------------------------------------------------------------------ + +void input_init_env( retro_environment_t _environ_cb ) +{ + // Cache this + environ_cb = _environ_cb; + +#define RETRO_DESCRIPTOR_BLOCK( _user ) \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "D-Pad Up" }, \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "D-Pad Down" }, \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "D-Pad Left" }, \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "A Button" }, \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "B Button" }, \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "C Button" }, \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "X Button" }, \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "Y Button" }, \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "Z Button" }, \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L2, "L Button" }, \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R2, "R Button" }, \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start Button" }, \ + { _user, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Mode Switch" }, \ + { _user, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Analog X" }, \ + { _user, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Analog Y" }, \ + { _user, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_X, "Analog X (Right)" }, \ + { _user, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_RIGHT, RETRO_DEVICE_ID_ANALOG_Y, "Analog Y (Right)" }, \ + { _user, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_TRIGGER, "Gun Trigger" }, \ + { _user, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_START, "Gun Start" }, \ + { _user, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_RELOAD, "Gun Reload" } + + struct retro_input_descriptor desc[] = + { + RETRO_DESCRIPTOR_BLOCK( 0 ), + RETRO_DESCRIPTOR_BLOCK( 1 ), + RETRO_DESCRIPTOR_BLOCK( 2 ), + RETRO_DESCRIPTOR_BLOCK( 3 ), + RETRO_DESCRIPTOR_BLOCK( 4 ), + RETRO_DESCRIPTOR_BLOCK( 5 ), + RETRO_DESCRIPTOR_BLOCK( 6 ), + RETRO_DESCRIPTOR_BLOCK( 7 ), + RETRO_DESCRIPTOR_BLOCK( 8 ), + RETRO_DESCRIPTOR_BLOCK( 9 ), + RETRO_DESCRIPTOR_BLOCK( 10 ), + RETRO_DESCRIPTOR_BLOCK( 11 ), + + { 0 }, + }; + +#undef RETRO_DESCRIPTOR_BLOCK + + /* Send to front-end */ + environ_cb( RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, desc ); +} + +void input_set_env( retro_environment_t environ_cb ) +{ + /* Pick ports selection */ + const struct retro_controller_info* ports = ports_lut[ setting_multitap_port1 + setting_multitap_port2 * 2 ]; + + /* Send to front-end */ + environ_cb( RETRO_ENVIRONMENT_SET_CONTROLLER_INFO, (void*)ports ); +} + +void input_init(void) +{ + // Initialise to default pad type and bind input buffers to SMPC emulation. + for ( unsigned i = 0; i < MAX_CONTROLLERS; ++i ) + { + input_type[ i ] = RETRO_DEVICE_JOYPAD; + input_mode[ i ] = INPUT_MODE_DEFAULT; + input_throttle_latch[ i ] = 0; + + SMPC_SetInput( i, "gamepad", (uint8*)&input_data[ i ] ); + } +} + +void input_set_geometry( unsigned width, unsigned height ) +{ + log_cb( RETRO_LOG_INFO, "input_set_geometry: %dx%d\n", width, height ); + + geometry_width = width; + geometry_height = height; +} + +void input_set_deadzone_stick( int percent ) +{ + if ( percent >= 0 && percent <= 100 ) + astick_deadzone = (int)( percent * 0.01f * 0x8000); +} + +void input_set_deadzone_trigger( int percent ) +{ + if ( percent >= 0 && percent <= 100 ) + trigger_deadzone = (int)( percent * 0.01f * TRIGGER_MAX); +} + +void input_set_mouse_sensitivity( int percent ) +{ + if ( percent > 0 && percent <= 200 ) + mouse_sensitivity = (float)percent / 100.0f; +} + +void input_update_with_bitmasks( retro_input_state_t input_state_cb ) +{ + // For each player (logical controller) + for ( unsigned iplayer = 0; iplayer < players; ++iplayer ) + { + INPUT_DATA* p_input = &(input_data[ iplayer ]); + int16_t ret = input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_MASK ); + + // reset input + p_input->buttons = 0; + + // What kind of controller is connected? + switch ( input_type[ iplayer ] ) + { + + case RETRO_DEVICE_JOYPAD: + case RETRO_DEVICE_SS_PAD: + + // + // -- standard control pad buttons + d-pad + + // input_map_pad is configured to quickly map libretro buttons to the correct bits for the Saturn. + for ( int i = 0; i < INPUT_MAP_PAD_SIZE; ++i ) + { + if (ret & (1 << input_map_pad[ i ])) + p_input->buttons |= ( 1 << i ); + } + + // .. the left trigger on the Saturn is a special case since there's a gap in the bits. + if (ret & (1 << RETRO_DEVICE_ID_JOYPAD_L2)) + p_input->buttons |= ( 1 << 15 ); + + if (!opposite_directions) + { + if ((p_input->buttons & 0x30) == 0x30) + p_input->buttons &= ~0x30; + if ((p_input->buttons & 0xC0) == 0xC0) + p_input->buttons &= ~0xC0; + } + break; + + case RETRO_DEVICE_SS_TWINSTICK: + + { + int analog_lx, analog_ly; + int analog_rx, analog_ry; + get_analog_stick( input_state_cb, iplayer, RETRO_DEVICE_INDEX_ANALOG_LEFT, &analog_lx, &analog_ly ); + get_analog_stick( input_state_cb, iplayer, RETRO_DEVICE_INDEX_ANALOG_RIGHT, &analog_rx, &analog_ry ); + + const int thresh = 16000; + + // left-stick + if ( analog_ly <= -thresh ) + p_input->buttons |= ( 1 << 4 ); // Up + if ( analog_lx >= thresh ) + p_input->buttons |= ( 1 << 7 ); // Right + if ( analog_ly >= thresh ) + p_input->buttons |= ( 1 << 5 ); // Down + if ( analog_lx <= -thresh ) + p_input->buttons |= ( 1 << 6 ); // Left + + // right-stick + if ( analog_ry <= -thresh ) + p_input->buttons |= ( 1 << 1 ); // Up <-(Y) + if ( analog_rx >= thresh ) + p_input->buttons |= ( 1 << 0 ); // Right <-(Z) + if ( analog_ry >= thresh ) + p_input->buttons |= ( 1 << 8 ); // Down <-(B) + if ( analog_rx <= -thresh ) + p_input->buttons |= ( 1 << 2 ); // Left <-(X) + + // left trigger + if (ret & (1 << input_map_twinstick_left_trigger)) + p_input->buttons |= (1 << 15); + + // left button + if (ret & (1 << input_map_twinstick_left_button)) + p_input->buttons |= ( 1 << 3 ); + + // right trigger + if (ret & (1 << input_map_twinstick_right_trigger)) + p_input->buttons |= ( 1 << 10 ); + + // right button + if (ret & (1 << input_map_twinstick_right_button)) + p_input->buttons |= ( 1 << 9 ); + + // start + if (ret & (1 << RETRO_DEVICE_ID_JOYPAD_START)) + p_input->buttons |= ( 1 << 11 ); + } + break; + + case RETRO_DEVICE_SS_3D_PAD: + + { + // + // -- 3d control pad buttons + + // input_map_3d_pad is configured to quickly map libretro buttons to the correct bits for the Saturn. + for ( int i = 0; i < INPUT_MAP_3D_PAD_SIZE; ++i ) + if (ret & (1 << input_map_3d_pad[ i ])) + p_input->buttons |= ( 1 << i ); + + // + // -- analog stick + + int analog_x, analog_y; + get_analog_stick( input_state_cb, iplayer, RETRO_DEVICE_INDEX_ANALOG_LEFT, &analog_x, &analog_y ); + + // mednafen wants 0 - 32767 - 65535 + uint16_t thumb_x = static_cast< uint16_t >( analog_x + 32767 ); + uint16_t thumb_y = static_cast< uint16_t >( analog_y + 32767 ); + + // + // -- triggers + + // mednafen wants 0 - 65535 + uint16_t l_trigger = get_analog_trigger( input_state_cb, iplayer, RETRO_DEVICE_ID_JOYPAD_L2 ); + uint16_t r_trigger = get_analog_trigger( input_state_cb, iplayer, RETRO_DEVICE_ID_JOYPAD_R2 ); + + + // + // -- mode switch + + { + // Handle MODE button as a switch + uint16_t prev = ( input_mode[iplayer] & INPUT_MODE_3D_PAD_PREVIOUS_MASK ); + uint16_t held = 0; + + if (ret & (1 << input_map_3d_pad_mode_switch)) + held = INPUT_MODE_3D_PAD_PREVIOUS_MASK; + + // Rising edge trigger + if ( !prev && held ) + { + char text[ 256 ]; + // Toggle 'state' bit: analog/digital mode + input_mode[ iplayer ] ^= INPUT_MODE_3D_PAD_ANALOG; + + // Tell user + if ( input_mode[iplayer] & INPUT_MODE_3D_PAD_ANALOG ) + sprintf( text, "Controller %u: Analog Mode", (iplayer+1) ); + else + sprintf( text, "Controller %u: Digital Mode", (iplayer+1) ); + struct retro_message msg = { text, 180 }; + environ_cb( RETRO_ENVIRONMENT_SET_MESSAGE, &msg ); + } + + // Store held state in 'previous' bit. + input_mode[ iplayer ] = ( input_mode[ iplayer ] & ~INPUT_MODE_3D_PAD_PREVIOUS_MASK ) | held; + } + + // + // -- format input data + + // Apply analog/digital mode switch bit. + if ( input_mode[iplayer] & INPUT_MODE_3D_PAD_ANALOG ) + p_input->buttons |= 0x1000; // set bit 12 + + p_input->u8[0x2] = ((thumb_x >> 0) & 0xff); + p_input->u8[0x3] = ((thumb_x >> 8) & 0xff); + p_input->u8[0x4] = ((thumb_y >> 0) & 0xff); + p_input->u8[0x5] = ((thumb_y >> 8) & 0xff); + p_input->u8[0x6] = ((r_trigger >> 0) & 0xff); + p_input->u8[0x7] = ((r_trigger >> 8) & 0xff); + p_input->u8[0x8] = ((l_trigger >> 0) & 0xff); + p_input->u8[0x9] = ((l_trigger >> 8) & 0xff); + } + + break; + + case RETRO_DEVICE_SS_WHEEL: + + { + // + // -- Wheel buttons + + // input_map_wheel is configured to quickly map libretro buttons to the correct bits for the Saturn. + for ( int i = 0; i < INPUT_MAP_WHEEL_SIZE; ++i ) + { + const uint16_t bit = ( 1 << ( i + INPUT_MAP_WHEEL_BITSHIFT ) ); + if (ret & (1 << input_map_wheel[ i ])) + p_input->buttons |= bit; + } + + // shift-paddles + if (ret & (1 << input_map_wheel_shift_left)) + p_input->buttons |= ( 1 << 0 ); + if (ret & (1 << input_map_wheel_shift_right)) + p_input->buttons |= ( 1 << 1 ); + + // + // -- analog wheel + + int analog_x; + get_analog_axis( input_state_cb, iplayer, + RETRO_DEVICE_INDEX_ANALOG_LEFT, + RETRO_DEVICE_ID_ANALOG_X, &analog_x ); + + // + // -- format input data + + // Convert analog values into direction values. + uint16_t right = analog_x > 0 ? analog_x : 0; + uint16_t left = analog_x < 0 ? -analog_x : 0; + + p_input->u8[0x2] = ((left >> 0) & 0xff); + p_input->u8[0x3] = ((left >> 8) & 0xff); + p_input->u8[0x4] = ((right >> 0) & 0xff); + p_input->u8[0x5] = ((right >> 8) & 0xff); + } + + break; + + case RETRO_DEVICE_SS_MOUSE: + + { + // mouse buttons + p_input->u8[0x4] = 0; + + if ( input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_LEFT ) ) + p_input->u8[0x4] |= ( 1 << 0 ); // A + + if ( input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_RIGHT ) ) + p_input->u8[0x4] |= ( 1 << 1 ); // B + + if ( input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_MIDDLE ) ) + p_input->u8[0x4] |= ( 1 << 2 ); // C + + if ( input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START ) || + input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_BUTTON_4 ) || + input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_BUTTON_5 ) ) { + p_input->u8[0x4] |= ( 1 << 3 ); // Start + } + + // mouse input + int dx_raw = input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_X ); + int dy_raw = input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_Y ); + + int16_t *delta = (int16_t*)p_input; + delta[ 0 ] = (int16_t)roundf( dx_raw * mouse_sensitivity ); + delta[ 1 ] = (int16_t)roundf( dy_raw * mouse_sensitivity ); + } + + break; + + case RETRO_DEVICE_SS_MISSION: + + { + // + // -- mission stick buttons + + // input_map_mission is configured to quickly map libretro buttons to the correct bits for the Saturn. + for ( int i = 0; i < INPUT_MAP_MISSION_SIZE; ++i ) + if (ret & (1 << input_map_mission[ i ])) + p_input->buttons |= ( 1 << i ); + + // .. the left trigger is a special case, there's a gap in the bits. + if (ret & (1 << input_map_mission_left_shoulder)) + p_input->buttons |= ( 1 << 11 ); + + // + // -- analog stick + + int analog_x, analog_y; + get_analog_stick( input_state_cb, iplayer, RETRO_DEVICE_INDEX_ANALOG_LEFT, &analog_x, &analog_y ); + + // + // -- throttle + + int throttle_real; + get_analog_axis( input_state_cb, iplayer, + RETRO_DEVICE_INDEX_ANALOG_RIGHT, + RETRO_DEVICE_ID_ANALOG_Y, &throttle_real ); + + int16_t throttle; + if ( input_mode[iplayer] & INPUT_MODE_MISSION_THROTTLE_LATCH ) // Use latched value + throttle = input_throttle_latch[iplayer]; + else // Direct read + throttle = throttle_real; + + + // + // -- throttle latch switch + + { + // Handle MODE button as a switch + uint16_t prev = ( input_mode[iplayer] & INPUT_MODE_MISSION_THROTTLE_PREV ); + uint16_t held = 0; + + if (ret & (1 << input_map_mission_throttle_latch)) + held = INPUT_MODE_MISSION_THROTTLE_PREV; + + // Rising edge trigger? + if ( !prev && held ) + { + // Toggle 'state' bit: throttle latch enable/disable. + input_mode[ iplayer ] ^= INPUT_MODE_MISSION_THROTTLE_LATCH; + + // Tell user + if ( input_mode[iplayer] & INPUT_MODE_MISSION_THROTTLE_LATCH ) + input_throttle_latch[iplayer] = (int16_t)throttle_real; + } + + // Store held state in 'previous' bit. + input_mode[ iplayer ] = ( input_mode[ iplayer ] & ~INPUT_MODE_MISSION_THROTTLE_PREV ) | held; + } + + + // + // -- format input data + + // Convert analog values into direction values. + uint16_t right = analog_x > 0 ? analog_x : 0; + uint16_t left = analog_x < 0 ? -analog_x : 0; + uint16_t down = analog_y > 0 ? analog_y : 0; + uint16_t up = analog_y < 0 ? -analog_y : 0; + uint16_t th_up = throttle > 0 ? throttle : 0; + uint16_t th_dn = throttle < 0 ? -throttle : 0; + + p_input->u8[0x2] = 0; // todo: auto-fire controls. + p_input->u8[0x3] = ((left >> 0) & 0xff); + p_input->u8[0x4] = ((left >> 8) & 0xff); + p_input->u8[0x5] = ((right >> 0) & 0xff); + p_input->u8[0x6] = ((right >> 8) & 0xff); + p_input->u8[0x7] = ((up >> 0) & 0xff); + p_input->u8[0x8] = ((up >> 8) & 0xff); + p_input->u8[0x9] = ((down >> 0) & 0xff); + p_input->u8[0xa] = ((down >> 8) & 0xff); + p_input->u8[0xb] = ((th_up >> 0) & 0xff); + p_input->u8[0xc] = ((th_up >> 8) & 0xff); + p_input->u8[0xd] = ((th_dn >> 0) & 0xff); + p_input->u8[0xe] = ((th_dn >> 8) & 0xff); + } + + break; + + case RETRO_DEVICE_SS_MISSION2: + + { + // + // -- mission stick buttons + + // input_map_mission is configured to quickly map libretro buttons to the correct bits for the Saturn. + for ( int i = 0; i < INPUT_MAP_MISSION_SIZE; ++i ) + { + if (ret & (1 << input_map_mission[ i ])) + p_input->buttons |= ( 1 << i ); + } + // .. the left trigger is a special case, there's a gap in the bits. + if (ret & (1 << input_map_mission_left_shoulder)) + p_input->buttons |= ( 1 << 11 ); + + // + // -- analog sticks + + int analog1_x, analog1_y; + int analog2_x, analog2_y; + + // Default - patent shows first stick on right side, second added on left + // see: https://segaretro.org/images/a/a1/Patent_EP0745928A2.pdf + get_analog_stick( input_state_cb, iplayer, RETRO_DEVICE_INDEX_ANALOG_RIGHT, &analog1_x, &analog1_y ); + get_analog_stick( input_state_cb, iplayer, RETRO_DEVICE_INDEX_ANALOG_LEFT, &analog2_x, &analog2_y ); + + // + // -- format input data + + // Convert analog values into direction values. + uint16_t right1 = analog1_x > 0 ? analog1_x : 0; + uint16_t left1 = analog1_x < 0 ? -analog1_x : 0; + uint16_t down1 = analog1_y > 0 ? analog1_y : 0; + uint16_t up1 = analog1_y < 0 ? -analog1_y : 0; + + uint16_t right2 = analog2_x > 0 ? analog2_x : 0; + uint16_t left2 = analog2_x < 0 ? -analog2_x : 0; + uint16_t down2 = analog2_y > 0 ? analog2_y : 0; + uint16_t up2 = analog2_y < 0 ? -analog2_y : 0; + + p_input->u8[ 0x2] = 0; // todo: auto-fire controls. + + p_input->u8[ 0x3] = ((left1 >> 0) & 0xff); + p_input->u8[ 0x4] = ((left1 >> 8) & 0xff); + p_input->u8[ 0x5] = ((right1 >> 0) & 0xff); + p_input->u8[ 0x6] = ((right1 >> 8) & 0xff); + p_input->u8[ 0x7] = ((up1 >> 0) & 0xff); + p_input->u8[ 0x8] = ((up1 >> 8) & 0xff); + p_input->u8[ 0x9] = ((down1 >> 0) & 0xff); + p_input->u8[ 0xa] = ((down1 >> 8) & 0xff); + p_input->u8[ 0xb] = 0; // todo: throttle1 + p_input->u8[ 0xc] = 0; + p_input->u8[ 0xd] = 0; + p_input->u8[ 0xe] = 0; + + p_input->u8[ 0xf] = ((left2 >> 0) & 0xff); + p_input->u8[0x10] = ((left2 >> 8) & 0xff); + p_input->u8[0x11] = ((right2 >> 0) & 0xff); + p_input->u8[0x12] = ((right2 >> 8) & 0xff); + p_input->u8[0x13] = ((up2 >> 0) & 0xff); + p_input->u8[0x14] = ((up2 >> 8) & 0xff); + p_input->u8[0x15] = ((down2 >> 0) & 0xff); + p_input->u8[0x16] = ((down2 >> 8) & 0xff); + p_input->u8[0x17] = 0; // todo: throttle2 + p_input->u8[0x18] = 0; + p_input->u8[0x19] = 0; + p_input->u8[0x1a] = 0; + } + + break; + + case RETRO_DEVICE_SS_GUN_JP: + case RETRO_DEVICE_SS_GUN_US: + + { + if ( setting_gun_input == SETTING_GUN_INPUT_POINTER ) + { + int gun_x_raw = input_state_cb( iplayer, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_X); + int gun_y_raw = input_state_cb( iplayer, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_Y); + + // .. scale into screen space: + // NOTE: the scaling here is empirical-guesswork. + // Tested at 352x240 (ntsc) and 352x256 (pal) + + const int scale_x = 21472; + const int scale_y = geometry_height; + const int offset_y = geometry_height - 240; + + int is_offscreen = 0; + + int gun_x = ( ( gun_x_raw + 0x7fff ) * scale_x ) / (0x7fff << 1); + int gun_y = ( ( gun_y_raw + 0x7fff ) * scale_y ) / (0x7fff << 1) + offset_y; + + // Handle offscreen by checking corrected x and y values + if ( gun_x == 0 || gun_y == 0 ) + { + is_offscreen = 1; + gun_x = -16384; // magic position to disable cross-hair drawing. + gun_y = -16384; + } + + // Touch sensitivity: Keep the gun position held for a fixed number of cycles after touch is released + // because a very light touch can result in a misfire + if ( pointer_cycles_after_released > 0 && pointer_cycles_after_released < POINTER_PRESSED_CYCLES ) { + pointer_cycles_after_released++; + p_input->gun_pos[ 0 ] = pointer_pressed_last_x; + p_input->gun_pos[ 1 ] = pointer_pressed_last_y; + return; + } + + // trigger + if ( input_state_cb( iplayer, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_PRESSED ) ) + { + pointer_pressed = 1; + pointer_cycles_after_released = 0; + pointer_pressed_last_x = gun_x; + pointer_pressed_last_y = gun_y; + } else if ( pointer_pressed ) { + pointer_cycles_after_released++; + pointer_pressed = 0; + p_input->gun_pos[ 0 ] = pointer_pressed_last_x; + p_input->gun_pos[ 1 ] = pointer_pressed_last_y; + p_input->u8[4] &= ~0x1; + return; + } + + // position + p_input->gun_pos[ 0 ] = gun_x; + p_input->gun_pos[ 1 ] = gun_y; + + // buttons + p_input->u8[ 4 ] = 0; + + // use multi-touch to support different button inputs: + // 3-finger touch: START button + // 2-finger touch: Reload + // offscreen touch: Reload + int touch_count = input_state_cb( iplayer, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_COUNT ); + if ( touch_count == 3 ) + p_input->u8[ 4 ] |= 0x2; + else if ( touch_count == 2 ) + p_input->u8[ 4 ] |= 0x4; + else if ( touch_count == 1 && is_offscreen ) + p_input->u8[ 4 ] |= 0x4; + else if ( touch_count == 1 ) + p_input->u8[ 4 ] |= 0x1; + + } else { // Lightgun input is default + uint8_t shot_type; + int gun_x, gun_y; + int forced_reload = input_state_cb( iplayer, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_RELOAD ); + + // off-screen? + if ( input_state_cb( iplayer, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_IS_OFFSCREEN ) || + forced_reload || + geometry_height == 0 ) + { + shot_type = 0x4; // off-screen shot + + gun_x = -16384; // magic position to disable cross-hair drawing. + gun_y = -16384; + } + else + { + shot_type = 0x1; // on-screen shot + + int gun_x_raw = input_state_cb( iplayer, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_X ); + int gun_y_raw = input_state_cb( iplayer, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_Y ); + + // .. scale into screen space: + // NOTE: the scaling here is empirical-guesswork. + // Tested at 352x240 (ntsc) and 352x256 (pal) + + const int scale_x = 21472; + const int scale_y = geometry_height; + const int offset_y = geometry_height - 240; + + gun_x = ( ( gun_x_raw + 0x7fff ) * scale_x ) / (0x7fff << 1); + gun_y = ( ( gun_y_raw + 0x7fff ) * scale_y ) / (0x7fff << 1) + offset_y; + } + + // position + p_input->gun_pos[ 0 ] = gun_x; + p_input->gun_pos[ 1 ] = gun_y; + + // buttons + p_input->u8[ 4 ] = 0; + + // trigger + if ( input_state_cb( iplayer, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_TRIGGER ) || forced_reload ) + p_input->u8[ 4 ] |= shot_type; + + // start + if ( input_state_cb( iplayer, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_START ) ) + p_input->u8[ 4 ] |= 0x2; + + } + + } + + break; + + } // switch ( input_type[ iplayer ] ) + + } // for each player +} + +void input_update( retro_input_state_t input_state_cb ) +{ + // For each player (logical controller) + for ( unsigned iplayer = 0; iplayer < players; ++iplayer ) + { + INPUT_DATA* p_input = &(input_data[ iplayer ]); + + // reset input + p_input->buttons = 0; + + // What kind of controller is connected? + switch ( input_type[ iplayer ] ) + { + + case RETRO_DEVICE_JOYPAD: + case RETRO_DEVICE_SS_PAD: + + // + // -- standard control pad buttons + d-pad + + // input_map_pad is configured to quickly map libretro buttons to the correct bits for the Saturn. + for ( int i = 0; i < INPUT_MAP_PAD_SIZE; ++i ) + p_input->buttons |= input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_pad[ i ] ) ? ( 1 << i ) : 0; + // .. the left trigger on the Saturn is a special case since there's a gap in the bits. + p_input->buttons |= + input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_pad_left_shoulder ) ? ( 1 << 15 ) : 0; + + if (!opposite_directions) + { + if ((p_input->buttons & 0x30) == 0x30) + p_input->buttons &= ~0x30; + if ((p_input->buttons & 0xC0) == 0xC0) + p_input->buttons &= ~0xC0; + } + break; + + case RETRO_DEVICE_SS_TWINSTICK: + + { + int analog_lx, analog_ly; + get_analog_stick( input_state_cb, iplayer, RETRO_DEVICE_INDEX_ANALOG_LEFT, &analog_lx, &analog_ly ); + int analog_rx, analog_ry; + get_analog_stick( input_state_cb, iplayer, RETRO_DEVICE_INDEX_ANALOG_RIGHT, &analog_rx, &analog_ry ); + + const int thresh = 16000; + + // left-stick + if ( analog_ly <= -thresh ) + p_input->buttons |= ( 1 << 4 ); // Up + if ( analog_lx >= thresh ) + p_input->buttons |= ( 1 << 7 ); // Right + if ( analog_ly >= thresh ) + p_input->buttons |= ( 1 << 5 ); // Down + if ( analog_lx <= -thresh ) + p_input->buttons |= ( 1 << 6 ); // Left + + // right-stick + if ( analog_ry <= -thresh ) + p_input->buttons |= ( 1 << 1 ); // Up <-(Y) + if ( analog_rx >= thresh ) + p_input->buttons |= ( 1 << 0 ); // Right <-(Z) + if ( analog_ry >= thresh ) + p_input->buttons |= ( 1 << 8 ); // Down <-(B) + if ( analog_rx <= -thresh ) + p_input->buttons |= ( 1 << 2 ); // Left <-(X) + + // left trigger + p_input->buttons |= + input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_twinstick_left_trigger ) ? ( 1 << 15 ) : 0; + + // left button + p_input->buttons |= + input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_twinstick_left_button ) ? ( 1 << 3 ) : 0; + + // right trigger + p_input->buttons |= + input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_twinstick_right_trigger ) ? ( 1 << 10 ) : 0; + + // right button + p_input->buttons |= + input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_twinstick_right_button ) ? ( 1 << 9 ) : 0; + + // start + p_input->buttons |= + input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START ) ? ( 1 << 11 ) : 0; + } + break; + + case RETRO_DEVICE_SS_3D_PAD: + + { + // + // -- 3d control pad buttons + + // input_map_3d_pad is configured to quickly map libretro buttons to the correct bits for the Saturn. + + for ( int i = 0; i < INPUT_MAP_3D_PAD_SIZE; ++i ) + p_input->buttons |= input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_3d_pad[ i ] ) ? ( 1 << i ) : 0; + + // + // -- analog stick + + int analog_x, analog_y; + get_analog_stick( input_state_cb, iplayer, RETRO_DEVICE_INDEX_ANALOG_LEFT, &analog_x, &analog_y ); + + // mednafen wants 0 - 32767 - 65535 + uint16_t thumb_x, thumb_y; + thumb_x = static_cast< uint16_t >( analog_x + 32767 ); + thumb_y = static_cast< uint16_t >( analog_y + 32767 ); + + // + // -- triggers + + // mednafen wants 0 - 65535 + uint16_t l_trigger, r_trigger; + l_trigger = get_analog_trigger( input_state_cb, iplayer, RETRO_DEVICE_ID_JOYPAD_L2 ); + r_trigger = get_analog_trigger( input_state_cb, iplayer, RETRO_DEVICE_ID_JOYPAD_R2 ); + + + // + // -- mode switch + + { + // Handle MODE button as a switch + uint16_t prev = ( input_mode[iplayer] & INPUT_MODE_3D_PAD_PREVIOUS_MASK ); + uint16_t held = 0; + + if (input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_3d_pad_mode_switch )) + held = INPUT_MODE_3D_PAD_PREVIOUS_MASK; + + // Rising edge trigger + if ( !prev && held ) + { + char text[ 256 ]; + // Toggle 'state' bit: analog/digital mode + input_mode[ iplayer ] ^= INPUT_MODE_3D_PAD_ANALOG; + + // Tell user + if ( input_mode[iplayer] & INPUT_MODE_3D_PAD_ANALOG ) + sprintf( text, "Controller %u: Analog Mode", (iplayer+1) ); + else + sprintf( text, "Controller %u: Digital Mode", (iplayer+1) ); + struct retro_message msg = { text, 180 }; + environ_cb( RETRO_ENVIRONMENT_SET_MESSAGE, &msg ); + } + + // Store held state in 'previous' bit. + input_mode[ iplayer ] = ( input_mode[ iplayer ] & ~INPUT_MODE_3D_PAD_PREVIOUS_MASK ) | held; + } + + // + // -- format input data + + // Apply analog/digital mode switch bit. + if ( input_mode[iplayer] & INPUT_MODE_3D_PAD_ANALOG ) { + p_input->buttons |= 0x1000; // set bit 12 + } + + p_input->u8[0x2] = ((thumb_x >> 0) & 0xff); + p_input->u8[0x3] = ((thumb_x >> 8) & 0xff); + p_input->u8[0x4] = ((thumb_y >> 0) & 0xff); + p_input->u8[0x5] = ((thumb_y >> 8) & 0xff); + p_input->u8[0x6] = ((r_trigger >> 0) & 0xff); + p_input->u8[0x7] = ((r_trigger >> 8) & 0xff); + p_input->u8[0x8] = ((l_trigger >> 0) & 0xff); + p_input->u8[0x9] = ((l_trigger >> 8) & 0xff); + } + + break; + + case RETRO_DEVICE_SS_WHEEL: + + { + // + // -- Wheel buttons + + // input_map_wheel is configured to quickly map libretro buttons to the correct bits for the Saturn. + for ( int i = 0; i < INPUT_MAP_WHEEL_SIZE; ++i ) { + const uint16_t bit = ( 1 << ( i + INPUT_MAP_WHEEL_BITSHIFT ) ); + p_input->buttons |= input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_wheel[ i ] ) ? bit : 0; + } + + // shift-paddles + p_input->buttons |= input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_wheel_shift_left ) ? ( 1 << 0 ) : 0; + p_input->buttons |= input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_wheel_shift_right ) ? ( 1 << 1 ) : 0; + + // + // -- analog wheel + + int analog_x; + get_analog_axis( input_state_cb, iplayer, + RETRO_DEVICE_INDEX_ANALOG_LEFT, + RETRO_DEVICE_ID_ANALOG_X, &analog_x ); + + // + // -- format input data + + // Convert analog values into direction values. + uint16_t right = analog_x > 0 ? analog_x : 0; + uint16_t left = analog_x < 0 ? -analog_x : 0; + + p_input->u8[0x2] = ((left >> 0) & 0xff); + p_input->u8[0x3] = ((left >> 8) & 0xff); + p_input->u8[0x4] = ((right >> 0) & 0xff); + p_input->u8[0x5] = ((right >> 8) & 0xff); + } + + break; + + case RETRO_DEVICE_SS_MOUSE: + + { + // mouse buttons + p_input->u8[0x4] = 0; + + if ( input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_LEFT ) ) { + p_input->u8[0x4] |= ( 1 << 0 ); // A + } + + if ( input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_RIGHT ) ) { + p_input->u8[0x4] |= ( 1 << 1 ); // B + } + + if ( input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_MIDDLE ) ) { + p_input->u8[0x4] |= ( 1 << 2 ); // C + } + + if ( input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START ) || + input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_BUTTON_4 ) || + input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_BUTTON_5 ) ) { + p_input->u8[0x4] |= ( 1 << 3 ); // Start + } + + // mouse input + int dx_raw, dy_raw; + dx_raw = input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_X ); + dy_raw = input_state_cb( iplayer, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_Y ); + + int16_t *delta; + delta = (int16_t*)p_input; + delta[ 0 ] = (int16_t)roundf( dx_raw * mouse_sensitivity ); + delta[ 1 ] = (int16_t)roundf( dy_raw * mouse_sensitivity ); + } + + break; + + case RETRO_DEVICE_SS_MISSION: + + { + // + // -- mission stick buttons + + // input_map_mission is configured to quickly map libretro buttons to the correct bits for the Saturn. + for ( int i = 0; i < INPUT_MAP_MISSION_SIZE; ++i ) + p_input->buttons |= input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_mission[ i ] ) ? ( 1 << i ) : 0; + // .. the left trigger is a special case, there's a gap in the bits. + p_input->buttons |= + input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_mission_left_shoulder ) ? ( 1 << 11 ) : 0; + + // + // -- analog stick + + int analog_x, analog_y; + get_analog_stick( input_state_cb, iplayer, RETRO_DEVICE_INDEX_ANALOG_LEFT, &analog_x, &analog_y ); + + // + // -- throttle + + int throttle_real; + get_analog_axis( input_state_cb, iplayer, + RETRO_DEVICE_INDEX_ANALOG_RIGHT, + RETRO_DEVICE_ID_ANALOG_Y, &throttle_real ); + + int16_t throttle; + if ( input_mode[iplayer] & INPUT_MODE_MISSION_THROTTLE_LATCH ) + { + // Use latched value + throttle = input_throttle_latch[iplayer]; + } + else + { + // Direct read + throttle = throttle_real; + } + + + // + // -- throttle latch switch + + { + // Handle MODE button as a switch + uint16_t prev = ( input_mode[iplayer] & INPUT_MODE_MISSION_THROTTLE_PREV ); + uint16_t held = 0; + + if (input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_mission_throttle_latch)) + held = INPUT_MODE_MISSION_THROTTLE_PREV; + + // Rising edge trigger? + if ( !prev && held ) + { + // Toggle 'state' bit: throttle latch enable/disable. + input_mode[ iplayer ] ^= INPUT_MODE_MISSION_THROTTLE_LATCH; + + // Tell user + if ( input_mode[iplayer] & INPUT_MODE_MISSION_THROTTLE_LATCH ) { + log_cb( RETRO_LOG_INFO, "Controller %u: Latched Throttle at %d\n", (iplayer+1), throttle_real ); + input_throttle_latch[iplayer] = (int16_t)throttle_real; + } else { + log_cb( RETRO_LOG_INFO, "Controller %u: Remove Throttle Latch\n", (iplayer+1) ); + } + } + + // Store held state in 'previous' bit. + input_mode[ iplayer ] = ( input_mode[ iplayer ] & ~INPUT_MODE_MISSION_THROTTLE_PREV ) | held; + } + + + // + // -- format input data + + // Convert analog values into direction values. + uint16_t right = analog_x > 0 ? analog_x : 0; + uint16_t left = analog_x < 0 ? -analog_x : 0; + uint16_t down = analog_y > 0 ? analog_y : 0; + uint16_t up = analog_y < 0 ? -analog_y : 0; + uint16_t th_up = throttle > 0 ? throttle : 0; + uint16_t th_dn = throttle < 0 ? -throttle : 0; + + p_input->u8[0x2] = 0; // todo: auto-fire controls. + p_input->u8[0x3] = ((left >> 0) & 0xff); + p_input->u8[0x4] = ((left >> 8) & 0xff); + p_input->u8[0x5] = ((right >> 0) & 0xff); + p_input->u8[0x6] = ((right >> 8) & 0xff); + p_input->u8[0x7] = ((up >> 0) & 0xff); + p_input->u8[0x8] = ((up >> 8) & 0xff); + p_input->u8[0x9] = ((down >> 0) & 0xff); + p_input->u8[0xa] = ((down >> 8) & 0xff); + p_input->u8[0xb] = ((th_up >> 0) & 0xff); + p_input->u8[0xc] = ((th_up >> 8) & 0xff); + p_input->u8[0xd] = ((th_dn >> 0) & 0xff); + p_input->u8[0xe] = ((th_dn >> 8) & 0xff); + } + + break; + + case RETRO_DEVICE_SS_MISSION2: + + { + // + // -- mission stick buttons + + // input_map_mission is configured to quickly map libretro buttons to the correct bits for the Saturn. + for ( int i = 0; i < INPUT_MAP_MISSION_SIZE; ++i ) + p_input->buttons |= input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_mission[ i ] ) ? ( 1 << i ) : 0; + // .. the left trigger is a special case, there's a gap in the bits. + p_input->buttons |= + input_state_cb( iplayer, RETRO_DEVICE_JOYPAD, 0, input_map_mission_left_shoulder ) ? ( 1 << 11 ) : 0; + + // + // -- analog sticks + + int analog1_x, analog1_y; + int analog2_x, analog2_y; + + // Default - patent shows first stick on right side, second added on left + // see: https://segaretro.org/images/a/a1/Patent_EP0745928A2.pdf + get_analog_stick( input_state_cb, iplayer, RETRO_DEVICE_INDEX_ANALOG_RIGHT, &analog1_x, &analog1_y ); + get_analog_stick( input_state_cb, iplayer, RETRO_DEVICE_INDEX_ANALOG_LEFT, &analog2_x, &analog2_y ); + + // + // -- format input data + + // Convert analog values into direction values. + uint16_t right1 = analog1_x > 0 ? analog1_x : 0; + uint16_t left1 = analog1_x < 0 ? -analog1_x : 0; + uint16_t down1 = analog1_y > 0 ? analog1_y : 0; + uint16_t up1 = analog1_y < 0 ? -analog1_y : 0; + + uint16_t right2 = analog2_x > 0 ? analog2_x : 0; + uint16_t left2 = analog2_x < 0 ? -analog2_x : 0; + uint16_t down2 = analog2_y > 0 ? analog2_y : 0; + uint16_t up2 = analog2_y < 0 ? -analog2_y : 0; + + p_input->u8[ 0x2] = 0; // todo: auto-fire controls. + + p_input->u8[ 0x3] = ((left1 >> 0) & 0xff); + p_input->u8[ 0x4] = ((left1 >> 8) & 0xff); + p_input->u8[ 0x5] = ((right1 >> 0) & 0xff); + p_input->u8[ 0x6] = ((right1 >> 8) & 0xff); + p_input->u8[ 0x7] = ((up1 >> 0) & 0xff); + p_input->u8[ 0x8] = ((up1 >> 8) & 0xff); + p_input->u8[ 0x9] = ((down1 >> 0) & 0xff); + p_input->u8[ 0xa] = ((down1 >> 8) & 0xff); + p_input->u8[ 0xb] = 0; // todo: throttle1 + p_input->u8[ 0xc] = 0; + p_input->u8[ 0xd] = 0; + p_input->u8[ 0xe] = 0; + + p_input->u8[ 0xf] = ((left2 >> 0) & 0xff); + p_input->u8[0x10] = ((left2 >> 8) & 0xff); + p_input->u8[0x11] = ((right2 >> 0) & 0xff); + p_input->u8[0x12] = ((right2 >> 8) & 0xff); + p_input->u8[0x13] = ((up2 >> 0) & 0xff); + p_input->u8[0x14] = ((up2 >> 8) & 0xff); + p_input->u8[0x15] = ((down2 >> 0) & 0xff); + p_input->u8[0x16] = ((down2 >> 8) & 0xff); + p_input->u8[0x17] = 0; // todo: throttle2 + p_input->u8[0x18] = 0; + p_input->u8[0x19] = 0; + p_input->u8[0x1a] = 0; + } + + break; + + case RETRO_DEVICE_SS_GUN_JP: + case RETRO_DEVICE_SS_GUN_US: + + { + if ( setting_gun_input == SETTING_GUN_INPUT_POINTER ) { + int gun_x, gun_y; + int gun_x_raw, gun_y_raw; + gun_x_raw = input_state_cb( iplayer, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_X); + gun_y_raw = input_state_cb( iplayer, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_Y); + + // .. scale into screen space: + // NOTE: the scaling here is empirical-guesswork. + // Tested at 352x240 (ntsc) and 352x256 (pal) + + const int scale_x = 21472; + const int scale_y = geometry_height; + const int offset_y = geometry_height - 240; + + int is_offscreen = 0; + + gun_x = ( ( gun_x_raw + 0x7fff ) * scale_x ) / (0x7fff << 1); + gun_y = ( ( gun_y_raw + 0x7fff ) * scale_y ) / (0x7fff << 1) + offset_y; + + // Handle offscreen by checking corrected x and y values + if ( gun_x == 0 || gun_y == 0 ) + { + is_offscreen = 1; + gun_x = -16384; // magic position to disable cross-hair drawing. + gun_y = -16384; + } + + // Touch sensitivity: Keep the gun position held for a fixed number of cycles after touch is released + // because a very light touch can result in a misfire + if ( pointer_cycles_after_released > 0 && pointer_cycles_after_released < POINTER_PRESSED_CYCLES ) { + pointer_cycles_after_released++; + p_input->gun_pos[ 0 ] = pointer_pressed_last_x; + p_input->gun_pos[ 1 ] = pointer_pressed_last_y; + return; + } + + // trigger + if ( input_state_cb( iplayer, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_PRESSED ) ) + { + pointer_pressed = 1; + pointer_cycles_after_released = 0; + pointer_pressed_last_x = gun_x; + pointer_pressed_last_y = gun_y; + } else if ( pointer_pressed ) { + pointer_cycles_after_released++; + pointer_pressed = 0; + p_input->gun_pos[ 0 ] = pointer_pressed_last_x; + p_input->gun_pos[ 1 ] = pointer_pressed_last_y; + p_input->u8[4] &= ~0x1; + return; + } + + // position + p_input->gun_pos[ 0 ] = gun_x; + p_input->gun_pos[ 1 ] = gun_y; + + // buttons + p_input->u8[ 4 ] = 0; + + // use multi-touch to support different button inputs: + // 3-finger touch: START button + // 2-finger touch: Reload + // offscreen touch: Reload + int touch_count = input_state_cb( iplayer, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_COUNT ); + if ( touch_count == 3 ) + p_input->u8[ 4 ] |= 0x2; + else if ( touch_count == 2 ) + p_input->u8[ 4 ] |= 0x4; + else if ( touch_count == 1 && is_offscreen ) + p_input->u8[ 4 ] |= 0x4; + else if ( touch_count == 1 ) + p_input->u8[ 4 ] |= 0x1; + + } else { // Lightgun input is default + uint8_t shot_type; + int gun_x, gun_y; + int forced_reload = input_state_cb( iplayer, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_RELOAD ); + + // off-screen? + if ( input_state_cb( iplayer, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_IS_OFFSCREEN ) || + forced_reload || + geometry_height == 0 ) + { + shot_type = 0x4; // off-screen shot + + gun_x = -16384; // magic position to disable cross-hair drawing. + gun_y = -16384; + } + else + { + shot_type = 0x1; // on-screen shot + + int gun_x_raw = input_state_cb( iplayer, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_X ); + int gun_y_raw = input_state_cb( iplayer, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_SCREEN_Y ); + + // .. scale into screen space: + // NOTE: the scaling here is empirical-guesswork. + // Tested at 352x240 (ntsc) and 352x256 (pal) + + const int scale_x = 21472; + const int scale_y = geometry_height; + const int offset_y = geometry_height - 240; + + gun_x = ( ( gun_x_raw + 0x7fff ) * scale_x ) / (0x7fff << 1); + gun_y = ( ( gun_y_raw + 0x7fff ) * scale_y ) / (0x7fff << 1) + offset_y; + } + + // position + p_input->gun_pos[ 0 ] = gun_x; + p_input->gun_pos[ 1 ] = gun_y; + + // buttons + p_input->u8[ 4 ] = 0; + + // trigger + if ( input_state_cb( iplayer, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_TRIGGER ) || forced_reload ) + p_input->u8[ 4 ] |= shot_type; + + // start + if ( input_state_cb( iplayer, RETRO_DEVICE_LIGHTGUN, 0, RETRO_DEVICE_ID_LIGHTGUN_START ) ) + p_input->u8[ 4 ] |= 0x2; + + } + + } + + break; + + } // switch ( input_type[ iplayer ] ) + + } // for each player +} + +// save state function for input +int input_StateAction( StateMem* sm, const unsigned load, const bool data_only ) +{ + int success; + + SFORMAT StateRegs[] = + { + SFPTR16N( input_mode, MAX_CONTROLLERS, "pad-mode" ), + SFPTR16N( input_throttle_latch, MAX_CONTROLLERS, "throttle-latch" ), + SFEND + }; + + success = MDFNSS_StateAction( sm, load, data_only, StateRegs, "LIBRETRO-INPUT", false); + + // ok? + return success; +} + +//------------------------------------------------------------------------------ +// Libretro Interface +//------------------------------------------------------------------------------ + +void retro_set_controller_port_device( unsigned in_port, unsigned device ) +{ + if ( in_port < MAX_CONTROLLERS ) + { + // Store input type + input_type[ in_port ] = device; + input_mode[ in_port ] = INPUT_MODE_DEFAULT; + + switch ( device ) + { + + case RETRO_DEVICE_NONE: + log_cb( RETRO_LOG_INFO, "Controller %u: Unplugged\n", (in_port+1) ); + SMPC_SetInput( in_port, "none", (uint8*)&input_data[ in_port ] ); + break; + + case RETRO_DEVICE_JOYPAD: + case RETRO_DEVICE_SS_PAD: + log_cb( RETRO_LOG_INFO, "Controller %u: Control Pad\n", (in_port+1) ); + SMPC_SetInput( in_port, "gamepad", (uint8*)&input_data[ in_port ] ); + break; + + case RETRO_DEVICE_SS_3D_PAD: + log_cb( RETRO_LOG_INFO, "Controller %u: 3D Control Pad\n", (in_port+1) ); + SMPC_SetInput( in_port, "3dpad", (uint8*)&input_data[ in_port ] ); + input_mode[ in_port ] = INPUT_MODE_DEFAULT_3D_PAD; + break; + + case RETRO_DEVICE_SS_WHEEL: + log_cb( RETRO_LOG_INFO, "Controller %u: Arcade Racer\n", (in_port+1) ); + SMPC_SetInput( in_port, "wheel", (uint8*)&input_data[ in_port ] ); + break; + + case RETRO_DEVICE_SS_MISSION: + log_cb( RETRO_LOG_INFO, "Controller %u: Mission Stick\n", (in_port+1) ); + SMPC_SetInput( in_port, "mission", (uint8*)&input_data[ in_port ] ); + break; + + case RETRO_DEVICE_SS_MISSION2: + log_cb( RETRO_LOG_INFO, "Controller %u: Dual Mission Sticks\n", (in_port+1) ); + SMPC_SetInput( in_port, "dmission", (uint8*)&input_data[ in_port ] ); + break; + + case RETRO_DEVICE_SS_MOUSE: + log_cb( RETRO_LOG_INFO, "Controller %u: Mouse\n", (in_port+1) ); + SMPC_SetInput( in_port, "mouse", (uint8*)&input_data[ in_port ] ); + break; + + case RETRO_DEVICE_SS_GUN_US: + log_cb( RETRO_LOG_INFO, "Controller %u: Stunner\n", (in_port+1) ); + SMPC_SetInput( in_port, "gun", (uint8*)&input_data[ in_port ] ); + break; + + case RETRO_DEVICE_SS_GUN_JP: + log_cb( RETRO_LOG_INFO, "Controller %u: Virtua Gun\n", (in_port+1) ); + SMPC_SetInput( in_port, "gun", (uint8*)&input_data[ in_port ] ); + break; + + case RETRO_DEVICE_SS_TWINSTICK: + log_cb( RETRO_LOG_INFO, "Controller %u: Twin-Stick\n", (in_port+1) ); + SMPC_SetInput( in_port, "gamepad", (uint8*)&input_data[ in_port ] ); + break; + + default: + log_cb( RETRO_LOG_WARN, "Controller %u: Unsupported Device (%u)\n", (in_port+1), device ); + SMPC_SetInput( in_port, "none", (uint8*)&input_data[ in_port ] ); + break; + + }; // switch ( device ) + + }; // valid port? +} + +void input_multitap( int port, bool enabled ) +{ + switch ( port ) + { + case 1: // PORT 1 + if ( enabled != setting_multitap_port1 ) { + setting_multitap_port1 = enabled; + if ( setting_multitap_port1 ) { + log_cb( RETRO_LOG_INFO, "Connected 6Player Adaptor to Port 1\n" ); + } else { + log_cb( RETRO_LOG_INFO, "Removed 6Player Adaptor from Port 1\n" ); + } + SMPC_SetMultitap( 0, setting_multitap_port1 ); + } + break; + + case 2: // PORT 2 + if ( enabled != setting_multitap_port2 ) { + setting_multitap_port2 = enabled; + if ( setting_multitap_port2 ) { + log_cb( RETRO_LOG_INFO, "Connected 6Player Adaptor to Port 2\n" ); + } else { + log_cb( RETRO_LOG_INFO, "Removed 6Player Adaptor from Port 2\n" ); + } + SMPC_SetMultitap( 1, setting_multitap_port2 ); + } + break; + + }; // switch ( port ) + + // Update players count + players = 2; + if ( setting_multitap_port1 ) { + players += 5; + } + if ( setting_multitap_port2 ) { + players += 5; + } + + /*Tell front-end*/ + input_set_env( environ_cb ); +} + +//============================================================================== diff --git a/cores/saturn/input.h b/cores/saturn/input.h new file mode 100644 index 000000000..81993f42d --- /dev/null +++ b/cores/saturn/input.h @@ -0,0 +1,30 @@ +#ifndef __INPUT_H__ +#define __INPUT_H__ + +#include "libretro.h" +#include "mednafen/state.h" + +// These input routines tell libretro about Saturn peripherals +// and map input from the abstract 'retropad' into Saturn land. + +void input_init_env( retro_environment_t environ_cb ); + +void input_init(void); + +void input_set_geometry( unsigned width, unsigned height ); + +void input_set_env( retro_environment_t environ_cb ); + +void input_set_deadzone_stick( int percent ); +void input_set_deadzone_trigger( int percent ); +void input_set_mouse_sensitivity( int percent ); + +void input_update( retro_input_state_t input_state_cb); +void input_update_with_bitmasks( retro_input_state_t input_state_cb ); + +// save state function for input +int input_StateAction( StateMem* sm, const unsigned load, const bool data_only ); + +void input_multitap( int port, bool enabled ); + +#endif diff --git a/cores/saturn/jni/Android.mk b/cores/saturn/jni/Android.mk new file mode 100644 index 000000000..a05055126 --- /dev/null +++ b/cores/saturn/jni/Android.mk @@ -0,0 +1,37 @@ +LOCAL_PATH := $(call my-dir) + +CORE_DIR := $(LOCAL_PATH)/.. + +DEBUG := 0 +NEED_CD := 1 +NEED_BPP := 32 +NEED_DEINTERLACER := 1 +NEED_THREADING := 1 +NEED_TREMOR := 1 +HAVE_CHD := 1 +FLAGS := + +include $(CORE_DIR)/Makefile.common + +COREFLAGS := -funroll-loops $(INCFLAGS) -D__LIBRETRO__ -D_LOW_ACCURACY_ -DINLINE="inline" $(FLAGS) +COREFLAGS += -DWANT_SATURN_EMU + +GIT_VERSION := " $(shell git rev-parse --short HEAD || echo unknown)" +ifneq ($(GIT_VERSION)," unknown") + COREFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\" +endif + +include $(CLEAR_VARS) +LOCAL_MODULE := retro +LOCAL_SRC_FILES := $(SOURCES_CXX) $(SOURCES_C) +LOCAL_CFLAGS := $(COREFLAGS) +LOCAL_CXXFLAGS := $(COREFLAGS) -std=c++11 +LOCAL_LDFLAGS := -Wl,-version-script=$(CORE_DIR)/link.T +LOCAL_CPP_FEATURES := exceptions + +# armv5 clang workarounds +ifeq ($(TARGET_ARCH_ABI),armeabi) + LOCAL_LDLIBS := -latomic +endif + +include $(BUILD_SHARED_LIBRARY) diff --git a/cores/saturn/jni/Application.mk b/cores/saturn/jni/Application.mk new file mode 100644 index 000000000..8e1f6980c --- /dev/null +++ b/cores/saturn/jni/Application.mk @@ -0,0 +1,2 @@ +APP_STL := c++_static +APP_ABI := all diff --git a/cores/saturn/libretro-common/cdrom/cdrom.c b/cores/saturn/libretro-common/cdrom/cdrom.c new file mode 100644 index 000000000..90cb9b062 --- /dev/null +++ b/cores/saturn/libretro-common/cdrom/cdrom.c @@ -0,0 +1,1745 @@ +/* Copyright (C) 2010-2020 The RetroArch team +* +* --------------------------------------------------------------------------------------- +* The following license statement only applies to this file (cdrom.c). +* --------------------------------------------------------------------------------------- +* +* Permission is hereby granted, free of charge, +* to any person obtaining a copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation the rights to +* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +* and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#ifdef _WIN32 +#include +#else +#include +#endif + +#if defined(__linux__) && !defined(ANDROID) +#include +#include +#endif + +#if defined(_WIN32) && !defined(_XBOX) +#include +#include +#include +#endif + +#define CDROM_CUE_TRACK_BYTES 107 +#define CDROM_MAX_SENSE_BYTES 16 +#define CDROM_MAX_RETRIES 10 + +typedef enum +{ + DIRECTION_NONE, + DIRECTION_IN, + DIRECTION_OUT +} CDROM_CMD_Direction; + +void cdrom_lba_to_msf(unsigned lba, unsigned char *min, unsigned char *sec, unsigned char *frame) +{ + if (!min || !sec || !frame) + return; + + *frame = lba % 75; + lba /= 75; + *sec = lba % 60; + lba /= 60; + *min = lba; +} + +unsigned cdrom_msf_to_lba(unsigned char min, unsigned char sec, unsigned char frame) +{ + return (min * 60 + sec) * 75 + frame; +} + +void increment_msf(unsigned char *min, unsigned char *sec, unsigned char *frame) +{ + if (!min || !sec || !frame) + return; + + *min = (*frame == 74) ? (*sec < 59 ? *min : *min + 1) : *min; + *sec = (*frame == 74) ? (*sec < 59 ? (*sec + 1) : 0) : *sec; + *frame = (*frame < 74) ? (*frame + 1) : 0; +} + +#ifdef CDROM_DEBUG +static void cdrom_print_sense_data(const unsigned char *sense, size_t len) +{ + unsigned i; + const char *sense_key_text = NULL; + unsigned char key; + unsigned char asc; + unsigned char ascq; + + if (len < 16) + { + printf("[CDROM] Sense data buffer length too small.\n"); + fflush(stdout); + return; + } + + key = sense[2] & 0xF; + asc = sense[12]; + ascq = sense[13]; + + printf("[CDROM] Sense Data: "); + + for (i = 0; i < MIN(len, 16); i++) + { + printf("%02X ", sense[i]); + } + + printf("\n"); + + if (sense[0] == 0x70) + printf("[CDROM] CURRENT ERROR:\n"); + if (sense[0] == 0x71) + printf("[CDROM] DEFERRED ERROR:\n"); + + switch (key) + { + case 0: + sense_key_text = "NO SENSE"; + break; + case 1: + sense_key_text = "RECOVERED ERROR"; + break; + case 2: + sense_key_text = "NOT READY"; + break; + case 3: + sense_key_text = "MEDIUM ERROR"; + break; + case 4: + sense_key_text = "HARDWARE ERROR"; + break; + case 5: + sense_key_text = "ILLEGAL REQUEST"; + break; + case 6: + sense_key_text = "UNIT ATTENTION"; + break; + case 7: + sense_key_text = "DATA PROTECT"; + break; + case 8: + sense_key_text = "BLANK CHECK"; + break; + case 9: + sense_key_text = "VENDOR SPECIFIC"; + break; + case 10: + sense_key_text = "COPY ABORTED"; + break; + case 11: + sense_key_text = "ABORTED COMMAND"; + break; + case 13: + sense_key_text = "VOLUME OVERFLOW"; + break; + case 14: + sense_key_text = "MISCOMPARE"; + break; + } + + printf("[CDROM] Sense Key: %02X (%s)\n", key, sense_key_text ? sense_key_text : "null"); + printf("[CDROM] ASC: %02X\n", asc); + printf("[CDROM] ASCQ: %02X\n", ascq); + + switch (key) + { + case 2: + { + switch (asc) + { + case 4: + { + switch (ascq) + { + case 1: + printf("[CDROM] Description: LOGICAL UNIT IS IN PROCESS OF BECOMING READY\n"); + break; + default: + break; + } + + break; + } + case 0x3a: + { + switch (ascq) + { + case 0: + printf("[CDROM] Description: MEDIUM NOT PRESENT\n"); + break; + case 3: + printf("[CDROM] Description: MEDIUM NOT PRESENT - LOADABLE\n"); + break; + case 1: + printf("[CDROM] Description: MEDIUM NOT PRESENT - TRAY CLOSED\n"); + break; + case 2: + printf("[CDROM] Description: MEDIUM NOT PRESENT - TRAY OPEN\n"); + break; + default: + break; + } + + break; + } + default: + break; + } + } + case 3: + { + if (asc == 0x11 && ascq == 0x5) + printf("[CDROM] Description: L-EC UNCORRECTABLE ERROR\n"); + break; + } + case 5: + { + if (asc == 0x20 && ascq == 0) + printf("[CDROM] Description: INVALID COMMAND OPERATION CODE\n"); + else if (asc == 0x24 && ascq == 0) + printf("[CDROM] Description: INVALID FIELD IN CDB\n"); + else if (asc == 0x26 && ascq == 0) + printf("[CDROM] Description: INVALID FIELD IN PARAMETER LIST\n"); + break; + } + case 6: + { + if (asc == 0x28 && ascq == 0) + printf("[CDROM] Description: NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED\n"); + break; + } + default: + break; + } + + fflush(stdout); +} +#endif + +#if defined(_WIN32) && !defined(_XBOX) +static int cdrom_send_command_win32(const libretro_vfs_implementation_file *stream, CDROM_CMD_Direction dir, void *buf, size_t len, unsigned char *cmd, size_t cmd_len, unsigned char *sense, size_t sense_len) +{ + DWORD ioctl_bytes; + BOOL ioctl_rv; +#ifdef CDROM_DEBUG + clock_t t = clock(); + const char *extra = " "; + static unsigned char last_min = 0; + static unsigned char last_sec = 0; + static unsigned char last_frame = 0; + + unsigned lba_cur = cdrom_msf_to_lba(last_min, last_sec, last_frame); + unsigned lba_req = cdrom_msf_to_lba(cmd[3], cmd[4], cmd[5]); +#endif + struct sptd_with_sense + { + SCSI_PASS_THROUGH_DIRECT s; + UCHAR sense[128]; + } sptd; + + memset(&sptd, 0, sizeof(sptd)); + + sptd.s.Length = sizeof(sptd.s); + sptd.s.CdbLength = cmd_len; + + switch (dir) + { + case DIRECTION_IN: + sptd.s.DataIn = SCSI_IOCTL_DATA_IN; + break; + case DIRECTION_OUT: + sptd.s.DataIn = SCSI_IOCTL_DATA_OUT; + break; + case DIRECTION_NONE: + default: + sptd.s.DataIn = SCSI_IOCTL_DATA_UNSPECIFIED; + break; + } + + sptd.s.TimeOutValue = 5; + sptd.s.DataBuffer = buf; + sptd.s.DataTransferLength = len; + sptd.s.SenseInfoLength = sizeof(sptd.sense); + sptd.s.SenseInfoOffset = offsetof(struct sptd_with_sense, sense); + + memcpy(sptd.s.Cdb, cmd, cmd_len); + + ioctl_rv = DeviceIoControl(stream->fh, IOCTL_SCSI_PASS_THROUGH_DIRECT, &sptd, + sizeof(sptd), &sptd, sizeof(sptd), &ioctl_bytes, NULL); + +#ifdef CDROM_DEBUG + if (lba_req < lba_cur) + extra = " BACKWARDS SECTOR READ"; + else if (lba_req > lba_cur) + extra = " SKIPPED SECTOR READ"; + + if (cmd[0] == 0xB9) + { + double time_taken = (double)(((clock() - t) * 1000) / CLOCKS_PER_SEC); + printf("time taken %f ms for DT received length %ld of %" PRId64 " for %02d:%02d:%02d to %02d:%02d:%02d%s req %d cur %d cur_lba %d\n", time_taken, sptd.s.DataTransferLength, len, cmd[3], cmd[4], cmd[5], cmd[6], cmd[7], cmd[8], extra, lba_req, lba_cur, stream->cdrom.cur_lba); + fflush(stdout); + } + + last_min = cmd[3]; + last_sec = cmd[4]; + last_frame = cmd[5]; + increment_msf(&last_min, &last_sec, &last_frame); +#endif + + if (!ioctl_rv || sptd.s.ScsiStatus != 0) + return 1; + + return 0; +} +#endif + +#if defined(__linux__) && !defined(ANDROID) +static int cdrom_send_command_linux(const libretro_vfs_implementation_file *stream, CDROM_CMD_Direction dir, void *buf, size_t len, unsigned char *cmd, size_t cmd_len, unsigned char *sense, size_t sense_len) +{ + sg_io_hdr_t sgio = {0}; + int rv; + + switch (dir) + { + case DIRECTION_IN: + sgio.dxfer_direction = SG_DXFER_FROM_DEV; + break; + case DIRECTION_OUT: + sgio.dxfer_direction = SG_DXFER_TO_DEV; + break; + case DIRECTION_NONE: + default: + sgio.dxfer_direction = SG_DXFER_NONE; + break; + } + + sgio.interface_id = 'S'; + sgio.cmd_len = cmd_len; + sgio.cmdp = cmd; + sgio.dxferp = buf; + sgio.dxfer_len = len; + sgio.sbp = sense; + sgio.mx_sb_len = sense_len; + sgio.timeout = 5000; + + rv = ioctl(fileno(stream->fp), SG_IO, &sgio); + + if (rv == -1 || sgio.info & SG_INFO_CHECK) + return 1; + + return 0; +} +#endif + +static int cdrom_send_command(libretro_vfs_implementation_file *stream, CDROM_CMD_Direction dir, void *buf, size_t len, unsigned char *cmd, size_t cmd_len, size_t skip) +{ + unsigned char *xfer_buf = NULL; + unsigned char *xfer_buf_pos = xfer_buf; + unsigned char sense[CDROM_MAX_SENSE_BYTES] = {0}; + unsigned char retries_left = CDROM_MAX_RETRIES; + int i, rv = 0; + int frames = 1; + size_t padded_req_bytes; + size_t copied_bytes = 0; + bool read_cd = false; + + if (!cmd || cmd_len == 0) + return 1; + + if (cmd[0] == 0xBE || cmd[0] == 0xB9) + { + frames = ceil((len + skip) / 2352.0); + padded_req_bytes = 2352 * frames; + read_cd = true; + /* these will be incremented below */ + cmd[6] = cmd[3]; + cmd[7] = cmd[4]; + cmd[8] = cmd[5]; + } + else + { + padded_req_bytes = len + skip; + } + + xfer_buf = (unsigned char*)memalign_alloc(4096, padded_req_bytes); + xfer_buf_pos = xfer_buf; + + if (!xfer_buf) + return 1; + + memset(xfer_buf, 0, padded_req_bytes); +#ifdef CDROM_DEBUG + printf("Number of frames to read: %d\n", frames); + fflush(stdout); +#endif + for (i = 0; i < frames; i++) + { + size_t request_len = padded_req_bytes; + size_t copy_len = request_len; + bool cached_read = false; + + if (read_cd) + { + unsigned lba_req = 0; + + request_len = 2352; + copy_len = request_len; + + increment_msf(&cmd[6], &cmd[7], &cmd[8]); + + if (i > 0) + { + skip = 0; + increment_msf(&cmd[3], &cmd[4], &cmd[5]); + } + else + { + if (skip) + copy_len -= skip; + } + + if (i == frames - 1) + { + copy_len = len - copied_bytes; + } + + lba_req = cdrom_msf_to_lba(cmd[3], cmd[4], cmd[5]); + + if (stream->cdrom.last_frame_valid && lba_req == stream->cdrom.last_frame_lba) + { + /* use cached frame */ + cached_read = true; +#ifdef CDROM_DEBUG + printf("[CDROM] Using cached frame\n"); + fflush(stdout); +#endif + /* assumes request_len is always equal to the size of last_frame */ + memcpy(xfer_buf_pos, stream->cdrom.last_frame, sizeof(stream->cdrom.last_frame)); + } + + } + +#ifdef CDROM_DEBUG + if (!cached_read) + { + unsigned j; + + printf("[CDROM] Send Command: "); + + for (j = 0; j < cmd_len / sizeof(*cmd); j++) + { + printf("%02X ", cmd[j]); + } + + if (len) + printf("(buffer of size %" PRId64 " with skip bytes %" PRId64 " padded to %" PRId64 "), frame %d\n", len, skip, padded_req_bytes, i); + else + printf("\n"); + + fflush(stdout); + } +#endif + +retry: +#if defined(__linux__) && !defined(ANDROID) + if (cached_read || !cdrom_send_command_linux(stream, dir, xfer_buf_pos, request_len, cmd, cmd_len, sense, sizeof(sense))) +#else +#if defined(_WIN32) && !defined(_XBOX) + if (cached_read || !cdrom_send_command_win32(stream, dir, xfer_buf_pos, request_len, cmd, cmd_len, sense, sizeof(sense))) +#endif +#endif + { + rv = 0; + + if (buf) + { +#if 0 + printf("offsetting %" PRId64 " from buf, copying at xfer_buf offset %" PRId64 ", copying %" PRId64 " bytes\n", copied_bytes, (xfer_buf_pos + skip) - xfer_buf, copy_len); + fflush(stdout); +#endif + memcpy((char*)buf + copied_bytes, xfer_buf_pos + skip, copy_len); + copied_bytes += copy_len; + + if (read_cd && !cached_read && request_len >= 2352) + { + unsigned frame_end = cdrom_msf_to_lba(cmd[6], cmd[7], cmd[8]); + + /* cache the last received frame */ + memcpy(stream->cdrom.last_frame, xfer_buf_pos, sizeof(stream->cdrom.last_frame)); + stream->cdrom.last_frame_valid = true; + /* the ending frame is never actually read, so what we really just read is the one right before that */ + stream->cdrom.last_frame_lba = frame_end - 1; + } + else + stream->cdrom.last_frame_valid = false; + +#if 0 + printf("Frame %d, adding %" PRId64 " to buf_pos, is now %" PRId64 ". skip is %" PRId64 "\n", i, request_len, (xfer_buf_pos + request_len) - xfer_buf, skip); + fflush(stdout); +#endif + xfer_buf_pos += request_len; + } + } + else + { +#ifdef CDROM_DEBUG + cdrom_print_sense_data(sense, sizeof(sense)); +#endif + + /* INQUIRY/TEST/SENSE should never fail, don't retry. */ + /* READ ATIP seems to fail outright on some drives with pressed discs, skip retries. */ + if (cmd[0] != 0x0 && cmd[0] != 0x12 && cmd[0] != 0x5A && !(cmd[0] == 0x43 && cmd[2] == 0x4)) + { + unsigned char key = sense[2] & 0xF; + + switch (key) + { + case 0: + case 2: + case 3: + case 4: + case 6: + if (retries_left) + { + #ifdef CDROM_DEBUG + printf("[CDROM] Read Retry...\n"); + fflush(stdout); + #endif + retries_left--; + retro_sleep(1000); + goto retry; + } + else + { + rv = 1; + #ifdef CDROM_DEBUG + printf("[CDROM] Read retries failed, giving up.\n"); + fflush(stdout); + #endif + } + + break; + default: + break; + } + } + + rv = 1; + } + } + + if (xfer_buf) + memalign_free(xfer_buf); + + return rv; +} + +static const char* get_profile(unsigned short profile) +{ + switch (profile) + { + case 2: + return "Removable disk"; + break; + case 8: + return "CD-ROM"; + break; + case 9: + return "CD-R"; + break; + case 0xA: + return "CD-RW"; + break; + case 0x10: + return "DVD-ROM"; + break; + case 0x11: + return "DVD-R Sequential Recording"; + break; + case 0x12: + return "DVD-RAM"; + break; + case 0x13: + return "DVD-RW Restricted Overwrite"; + break; + case 0x14: + return "DVD-RW Sequential recording"; + break; + case 0x15: + return "DVD-R Dual Layer Sequential Recording"; + break; + case 0x16: + return "DVD-R Dual Layer Jump Recording"; + break; + case 0x17: + return "DVD-RW Dual Layer"; + break; + case 0x1A: + return "DVD+RW"; + break; + case 0x1B: + return "DVD+R"; + break; + case 0x2A: + return "DVD+RW Dual Layer"; + break; + case 0x2B: + return "DVD+R Dual Layer"; + break; + case 0x40: + return "BD-ROM"; + break; + case 0x41: + return "BD-R SRM"; + break; + case 0x42: + return "BD-R RRM"; + break; + case 0x43: + return "BD-RE"; + break; + case 0x50: + return "HD DVD-ROM"; + break; + case 0x51: + return "HD DVD-R"; + break; + case 0x52: + return "HD DVD-RAM"; + break; + case 0x53: + return "HD DVD-RW"; + break; + case 0x58: + return "HD DVD-R Dual Layer"; + break; + case 0x5A: + return "HD DVD-RW Dual Layer"; + break; + default: + break; + } + + return "Unknown"; +} + +int cdrom_get_sense(libretro_vfs_implementation_file *stream, unsigned char *sense, size_t len) +{ + unsigned char cdb[] = {0x3, 0, 0, 0, 0xFC, 0}; + unsigned char buf[0xFC] = {0}; + int rv = cdrom_send_command(stream, DIRECTION_IN, buf, sizeof(buf), cdb, sizeof(cdb), 0); + +#ifdef CDROM_DEBUG + printf("[CDROM] get sense data status code %d\n", rv); + fflush(stdout); +#endif + + if (rv) + return 1; + +#ifdef CDROM_DEBUG + cdrom_print_sense_data(buf, sizeof(buf)); +#endif + + return 0; +} + +void cdrom_get_current_config_random_readable(libretro_vfs_implementation_file *stream) +{ + unsigned char cdb[] = {0x46, 0x2, 0, 0x10, 0, 0, 0, 0, 0x14, 0}; + unsigned char buf[0x14] = {0}; + int rv = cdrom_send_command(stream, DIRECTION_IN, buf, sizeof(buf), cdb, sizeof(cdb), 0); + int i; + + printf("[CDROM] get current config random readable status code %d\n", rv); + + if (rv) + return; + + printf("[CDROM] Feature Header: "); + + for (i = 0; i < 8; i++) + { + printf("%02X ", buf[i]); + } + + printf("\n"); + + printf("[CDROM] Random Readable Feature Descriptor: "); + + for (i = 0; i < 12; i++) + { + printf("%02X ", buf[8 + i]); + } + + printf("\n"); + + printf("[CDROM] Supported commands: READ CAPACITY, READ (10)\n"); +} + +void cdrom_get_current_config_multiread(libretro_vfs_implementation_file *stream) +{ + unsigned char cdb[] = {0x46, 0x2, 0, 0x1D, 0, 0, 0, 0, 0xC, 0}; + unsigned char buf[0xC] = {0}; + int rv = cdrom_send_command(stream, DIRECTION_IN, buf, sizeof(buf), cdb, sizeof(cdb), 0); + int i; + + printf("[CDROM] get current config multi-read status code %d\n", rv); + + if (rv) + return; + + printf("[CDROM] Feature Header: "); + + for (i = 0; i < 8; i++) + { + printf("%02X ", buf[i]); + } + + printf("\n"); + + printf("[CDROM] Multi-Read Feature Descriptor: "); + + for (i = 0; i < 4; i++) + { + printf("%02X ", buf[8 + i]); + } + + printf("\n"); + + printf("[CDROM] Supported commands: READ (10), READ CD, READ DISC INFORMATION, READ TRACK INFORMATION\n"); +} + +void cdrom_get_current_config_cdread(libretro_vfs_implementation_file *stream) +{ + unsigned char cdb[] = {0x46, 0x2, 0, 0x1E, 0, 0, 0, 0, 0x10, 0}; + unsigned char buf[0x10] = {0}; + int rv = cdrom_send_command(stream, DIRECTION_IN, buf, sizeof(buf), cdb, sizeof(cdb), 0); + int i; + + printf("[CDROM] get current config cd read status code %d\n", rv); + + if (rv) + return; + + printf("[CDROM] Feature Header: "); + + for (i = 0; i < 8; i++) + { + printf("%02X ", buf[i]); + } + + printf("\n"); + + printf("[CDROM] CD Read Feature Descriptor: "); + + for (i = 0; i < 8; i++) + { + printf("%02X ", buf[8 + i]); + } + + if (buf[8 + 2] & 1) + printf("(current)\n"); + + printf("[CDROM] Supported commands: READ CD, READ CD MSF, READ TOC/PMA/ATIP\n"); +} + +void cdrom_get_current_config_profiles(libretro_vfs_implementation_file *stream) +{ + unsigned char cdb[] = {0x46, 0x2, 0, 0x0, 0, 0, 0, 0xFF, 0xFA, 0}; + unsigned char buf[0xFFFA] = {0}; + int rv = cdrom_send_command(stream, DIRECTION_IN, buf, sizeof(buf), cdb, sizeof(cdb), 0); + int i; + + printf("[CDROM] get current config profiles status code %d\n", rv); + + if (rv) + return; + + printf("[CDROM] Feature Header: "); + + for (i = 0; i < 8; i++) + { + printf("%02X ", buf[i]); + } + + printf("\n"); + + printf("[CDROM] Profile List Descriptor: "); + + for (i = 0; i < 4; i++) + { + printf("%02X ", buf[8 + i]); + } + + printf("\n"); + + printf("[CDROM] Number of profiles: %u\n", buf[8 + 3] / 4); + + for (i = 0; i < buf[8 + 3] / 4; i++) + { + unsigned short profile = (buf[8 + (4 * (i + 1))] << 8) | buf[8 + (4 * (i + 1)) + 1]; + + printf("[CDROM] Profile Number: %04X (%s) ", profile, get_profile(profile)); + + if (buf[8 + (4 * (i + 1)) + 2] & 1) + printf("(current)\n"); + else + printf("\n"); + } +} + +void cdrom_get_current_config_core(libretro_vfs_implementation_file *stream) +{ + unsigned char cdb[] = {0x46, 0x2, 0, 0x1, 0, 0, 0, 0, 0x14, 0}; + unsigned char buf[20] = {0}; + unsigned intf_std = 0; + int rv = cdrom_send_command(stream, DIRECTION_IN, buf, sizeof(buf), cdb, sizeof(cdb), 0); + int i; + const char *intf_std_name = "Unknown"; + + printf("[CDROM] get current config core status code %d\n", rv); + + if (rv) + return; + + printf("[CDROM] Feature Header: "); + + for (i = 0; i < 8; i++) + { + printf("%02X ", buf[i]); + } + + printf("\n"); + + if (buf[6] == 0 && buf[7] == 8) + printf("[CDROM] Current Profile: CD-ROM\n"); + else + printf("[CDROM] Current Profile: %02X%02X\n", buf[6], buf[7]); + + printf("[CDROM] Core Feature Descriptor: "); + + for (i = 0; i < 12; i++) + { + printf("%02X ", buf[8 + i]); + } + + printf("\n"); + + intf_std = buf[8 + 4] << 24 | buf[8 + 5] << 16 | buf[8 + 6] << 8 | buf[8 + 7]; + + switch (intf_std) + { + case 0: + intf_std_name = "Unspecified"; + break; + case 1: + intf_std_name = "SCSI Family"; + break; + case 2: + intf_std_name = "ATAPI"; + break; + case 7: + intf_std_name = "Serial ATAPI"; + break; + case 8: + intf_std_name = "USB"; + break; + default: + break; + } + + printf("[CDROM] Physical Interface Standard: %u (%s)\n", intf_std, intf_std_name); +} + +int cdrom_read_subq(libretro_vfs_implementation_file *stream, unsigned char *buf, size_t len) +{ + /* MMC Command: READ TOC/PMA/ATIP */ + unsigned char cdb[] = {0x43, 0x2, 0x2, 0, 0, 0, 0x1, 0x9, 0x30, 0}; +#ifdef CDROM_DEBUG + unsigned short data_len = 0; + unsigned char first_session = 0; + unsigned char last_session = 0; + int i; +#endif + int rv; + + if (!buf) + return 1; + + rv = cdrom_send_command(stream, DIRECTION_IN, buf, len, cdb, sizeof(cdb), 0); + + if (rv) + return 1; + +#ifdef CDROM_DEBUG + data_len = buf[0] << 8 | buf[1]; + first_session = buf[2]; + last_session = buf[3]; + + printf("[CDROM] Data Length: %d\n", data_len); + printf("[CDROM] First Session: %d\n", first_session); + printf("[CDROM] Last Session: %d\n", last_session); + + for (i = 0; i < (data_len - 2) / 11; i++) + { + unsigned char session_num = buf[4 + (i * 11) + 0]; + unsigned char adr = (buf[4 + (i * 11) + 1] >> 4) & 0xF; + /*unsigned char control = buf[4 + (i * 11) + 1] & 0xF;*/ + unsigned char tno = buf[4 + (i * 11) + 2]; + unsigned char point = buf[4 + (i * 11) + 3]; + unsigned char pmin = buf[4 + (i * 11) + 8]; + unsigned char psec = buf[4 + (i * 11) + 9]; + unsigned char pframe = buf[4 + (i * 11) + 10]; + + /*printf("i %d control %d adr %d tno %d point %d: ", i, control, adr, tno, point);*/ + /* why is control always 0? */ + + if (/*(control == 4 || control == 6) && */adr == 1 && tno == 0 && point >= 1 && point <= 99) + { + printf("[CDROM] - Session#: %d TNO %d POINT %d ", session_num, tno, point); + printf("Track start time: (aMSF %02u:%02u:%02u) ", (unsigned)pmin, (unsigned)psec, (unsigned)pframe); + } + else if (/*(control == 4 || control == 6) && */adr == 1 && tno == 0 && point == 0xA0) + { + printf("[CDROM] - Session#: %d TNO %d POINT %d ", session_num, tno, point); + printf("First Track Number: %d ", pmin); + printf("Disc Type: %d ", psec); + } + else if (/*(control == 4 || control == 6) && */adr == 1 && tno == 0 && point == 0xA1) + { + printf("[CDROM] - Session#: %d TNO %d POINT %d ", session_num, tno, point); + printf("Last Track Number: %d ", pmin); + } + else if (/*(control == 4 || control == 6) && */adr == 1 && tno == 0 && point == 0xA2) + { + printf("[CDROM] - Session#: %d TNO %d POINT %d ", session_num, tno, point); + printf("Lead-out start time: (aMSF %02u:%02u:%02u) ", (unsigned)pmin, (unsigned)psec, (unsigned)pframe); + } + + printf("\n"); + } + + fflush(stdout); +#endif + return 0; +} + +static int cdrom_read_track_info(libretro_vfs_implementation_file *stream, unsigned char track, cdrom_toc_t *toc) +{ + /* MMC Command: READ TRACK INFORMATION */ + unsigned char cdb[] = {0x52, 0x1, 0, 0, 0, 0, 0, 0x1, 0x80, 0}; + unsigned char buf[384] = {0}; + unsigned lba = 0; + unsigned track_size = 0; + int rv; + ssize_t pregap_lba_len; + + cdb[5] = track; + + rv = cdrom_send_command(stream, DIRECTION_IN, buf, sizeof(buf), cdb, sizeof(cdb), 0); + + if (rv) + return 1; + + memcpy(&lba, buf + 8, 4); + memcpy(&track_size, buf + 24, 4); + + lba = swap_if_little32(lba); + track_size = swap_if_little32(track_size); + + /* lba_start may be earlier than the MSF start times seen in read_subq */ + toc->track[track - 1].lba_start = lba; + toc->track[track - 1].track_size = track_size; + + pregap_lba_len = (toc->track[track - 1].audio ? 0 : (toc->track[track - 1].lba - toc->track[track - 1].lba_start)); + + toc->track[track - 1].track_bytes = (track_size - pregap_lba_len) * 2352; + toc->track[track - 1].mode = buf[6] & 0xF; + +#ifdef CDROM_DEBUG + printf("[CDROM] Track %d Info: ", track); + printf("Copy: %d ", (buf[5] & 0x10) > 0); + printf("Data Mode: %d ", toc->track[track - 1].mode); + printf("LBA Start: %d (%d) ", lba, toc->track[track - 1].lba); + printf("Track Size: %d\n", track_size); + fflush(stdout); +#endif + + return 0; +} + +int cdrom_set_read_speed(libretro_vfs_implementation_file *stream, unsigned speed) +{ + /* MMC Command: SET CD SPEED */ + unsigned char cmd[] = {0xBB, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + cmd[2] = (speed >> 24) & 0xFF; + cmd[3] = (speed >> 16) & 0xFF; + cmd[4] = (speed >> 8) & 0xFF; + cmd[5] = speed & 0xFF; + + return cdrom_send_command(stream, DIRECTION_NONE, NULL, 0, cmd, sizeof(cmd), 0); +} + +int cdrom_write_cue(libretro_vfs_implementation_file *stream, char **out_buf, size_t *out_len, char cdrom_drive, unsigned char *num_tracks, cdrom_toc_t *toc) +{ + unsigned char buf[2352] = {0}; + unsigned short data_len = 0; + size_t len = 0; + size_t pos = 0; + int rv = 0; + int i; + + if (!out_buf || !out_len || !num_tracks || !toc) + { +#ifdef CDROM_DEBUG + printf("[CDROM] Invalid buffer/length pointer for CDROM cue sheet\n"); + fflush(stdout); +#endif + return 1; + } + + cdrom_set_read_speed(stream, 0xFFFFFFFF); + + rv = cdrom_read_subq(stream, buf, sizeof(buf)); + + if (rv) + return rv; + + data_len = buf[0] << 8 | buf[1]; + + for (i = 0; i < (data_len - 2) / 11; i++) + { + unsigned char adr = (buf[4 + (i * 11) + 1] >> 4) & 0xF; + unsigned char tno = buf[4 + (i * 11) + 2]; + unsigned char point = buf[4 + (i * 11) + 3]; + unsigned char pmin = buf[4 + (i * 11) + 8]; + + if (/*(control == 4 || control == 6) && */adr == 1 && tno == 0 && point == 0xA1) + { + *num_tracks = pmin; +#ifdef CDROM_DEBUG + printf("[CDROM] Number of CDROM tracks: %d\n", *num_tracks); + fflush(stdout); +#endif + break; + } + } + + if (!*num_tracks || *num_tracks > 99) + { +#ifdef CDROM_DEBUG + printf("[CDROM] Invalid number of CDROM tracks: %d\n", *num_tracks); + fflush(stdout); +#endif + return 1; + } + + len = CDROM_CUE_TRACK_BYTES * (*num_tracks); + toc->num_tracks = *num_tracks; + *out_buf = (char*)calloc(1, len); + *out_len = len; + + for (i = 0; i < (data_len - 2) / 11; i++) + { + /*unsigned char session_num = buf[4 + (i * 11) + 0];*/ + unsigned char adr = (buf[4 + (i * 11) + 1] >> 4) & 0xF; + unsigned char control = buf[4 + (i * 11) + 1] & 0xF; + unsigned char tno = buf[4 + (i * 11) + 2]; + unsigned char point = buf[4 + (i * 11) + 3]; + /*unsigned char amin = buf[4 + (i * 11) + 4]; + unsigned char asec = buf[4 + (i * 11) + 5]; + unsigned char aframe = buf[4 + (i * 11) + 6];*/ + unsigned char pmin = buf[4 + (i * 11) + 8]; + unsigned char psec = buf[4 + (i * 11) + 9]; + unsigned char pframe = buf[4 + (i * 11) + 10]; + unsigned lba = cdrom_msf_to_lba(pmin, psec, pframe); + + /*printf("i %d control %d adr %d tno %d point %d: amin %d asec %d aframe %d pmin %d psec %d pframe %d\n", i, control, adr, tno, point, amin, asec, aframe, pmin, psec, pframe);*/ + /* why is control always 0? */ + + if (/*(control == 4 || control == 6) && */adr == 1 && tno == 0 && point >= 1 && point <= 99) + { + bool audio = false; + const char *track_type = "MODE1/2352"; + + audio = (!(control & 0x4) && !(control & 0x5)); + +#ifdef CDROM_DEBUG + printf("[CDROM] Track %02d CONTROL %01X ADR %01X AUDIO? %d\n", point, control, adr, audio); + fflush(stdout); +#endif + + toc->track[point - 1].track_num = point; + toc->track[point - 1].min = pmin; + toc->track[point - 1].sec = psec; + toc->track[point - 1].frame = pframe; + toc->track[point - 1].lba = lba; + toc->track[point - 1].audio = audio; + + cdrom_read_track_info(stream, point, toc); + + if (audio) + track_type = "AUDIO"; + else if (toc->track[point - 1].mode == 1) + track_type = "MODE1/2352"; + else if (toc->track[point - 1].mode == 2) + track_type = "MODE2/2352"; + +#if defined(_WIN32) && !defined(_XBOX) + pos += snprintf(*out_buf + pos, len - pos, "FILE \"cdrom://%c:/drive-track%02d.bin\" BINARY\n", cdrom_drive, point); +#else + pos += snprintf(*out_buf + pos, len - pos, "FILE \"cdrom://drive%c-track%02d.bin\" BINARY\n", cdrom_drive, point); +#endif + pos += snprintf(*out_buf + pos, len - pos, " TRACK %02d %s\n", point, track_type); + + { + unsigned pregap_lba_len = toc->track[point - 1].lba - toc->track[point - 1].lba_start; + + if (toc->track[point - 1].audio && pregap_lba_len > 0) + { + unsigned char min = 0; + unsigned char sec = 0; + unsigned char frame = 0; + + cdrom_lba_to_msf(pregap_lba_len, &min, &sec, &frame); + + pos += snprintf(*out_buf + pos, len - pos, " INDEX 00 00:00:00\n"); + pos += snprintf(*out_buf + pos, len - pos, " INDEX 01 %02u:%02u:%02u\n", (unsigned)min, (unsigned)sec, (unsigned)frame); + } + else + pos += snprintf(*out_buf + pos, len - pos, " INDEX 01 00:00:00\n"); + } + } + } + + return 0; +} + +/* needs 32 bytes for full vendor, product and version */ +int cdrom_get_inquiry(libretro_vfs_implementation_file *stream, char *model, int len, bool *is_cdrom) +{ + /* MMC Command: INQUIRY */ + unsigned char cdb[] = {0x12, 0, 0, 0, 0xff, 0}; + unsigned char buf[256] = {0}; + int rv = cdrom_send_command(stream, DIRECTION_IN, buf, sizeof(buf), cdb, sizeof(cdb), 0); + bool cdrom = false; + + if (rv) + return 1; + + if (model && len >= 32) + { + memset(model, 0, len); + + /* vendor */ + memcpy(model, buf + 8, 8); + + model[8] = ' '; + + /* product */ + memcpy(model + 9, buf + 16, 16); + + model[25] = ' '; + + /* version */ + memcpy(model + 26, buf + 32, 4); + } + + cdrom = (buf[0] == 5); + + if (is_cdrom && cdrom) + *is_cdrom = true; + +#ifdef CDROM_DEBUG + printf("[CDROM] Device Model: %s (is CD-ROM? %s)\n", model, (cdrom ? "yes" : "no")); +#endif + return 0; +} + +int cdrom_read(libretro_vfs_implementation_file *stream, cdrom_group_timeouts_t *timeouts, unsigned char min, unsigned char sec, unsigned char frame, void *s, size_t len, size_t skip) +{ + /* MMC Command: READ CD MSF */ + unsigned char cdb[] = {0xB9, 0, 0, 0, 0, 0, 0, 0, 0, 0xF8, 0, 0}; + int rv; + double frames = ceil((len + skip) / 2352.0); + unsigned frame_end = cdrom_msf_to_lba(min, sec, frame) + frames; + + cdb[3] = min; + cdb[4] = sec; + cdb[5] = frame; + + if (frames <= 1) + { + cdrom_lba_to_msf(frame_end, &cdb[6], &cdb[7], &cdb[8]); +#ifdef CDROM_DEBUG + printf("[CDROM] single-frame read: %d %d %d skip %" PRId64 "\n", cdb[3], cdb[4], cdb[5], skip); + fflush(stdout); +#endif + } + else + { + cdrom_lba_to_msf(frame_end, &cdb[6], &cdb[7], &cdb[8]); + +#ifdef CDROM_DEBUG + printf("[CDROM] multi-frame read: %d sectors starting from %02d:%02d:%02d skip %" PRId64 "\n", (int)frames, cdb[3], cdb[4], cdb[5], skip); + fflush(stdout); +#endif + } + + /* regardless of the length specified here, a new buffer will be allocated and padded to a sector multiple inside cdrom_send_command */ + rv = cdrom_send_command(stream, DIRECTION_IN, s, len, cdb, sizeof(cdb), skip); + +#ifdef CDROM_DEBUG + printf("[CDROM] read msf status code %d\n", rv); + fflush(stdout); +#endif + + if (rv) + { + stream->cdrom.last_frame_valid = false; + return 1; + } + + return 0; +} + +int cdrom_stop(libretro_vfs_implementation_file *stream) +{ + /* MMC Command: START STOP UNIT */ + unsigned char cdb[] = {0x1B, 0, 0, 0, 0x0, 0}; + int rv = cdrom_send_command(stream, DIRECTION_NONE, NULL, 0, cdb, sizeof(cdb), 0); + +#ifdef CDROM_DEBUG + printf("[CDROM] stop status code %d\n", rv); + fflush(stdout); +#endif + + if (rv) + return 1; + + return 0; +} + +int cdrom_unlock(libretro_vfs_implementation_file *stream) +{ + /* MMC Command: PREVENT ALLOW MEDIUM REMOVAL */ + unsigned char cdb[] = {0x1E, 0, 0, 0, 0x2, 0}; + int rv = cdrom_send_command(stream, DIRECTION_NONE, NULL, 0, cdb, sizeof(cdb), 0); + +#ifdef CDROM_DEBUG + printf("[CDROM] persistent prevent clear status code %d\n", rv); + fflush(stdout); +#endif + + if (rv) + return 1; + + cdb[4] = 0x0; + + rv = cdrom_send_command(stream, DIRECTION_NONE, NULL, 0, cdb, sizeof(cdb), 0); + +#ifdef CDROM_DEBUG + printf("[CDROM] prevent clear status code %d\n", rv); + fflush(stdout); +#endif + + if (rv) + return 1; + + return 0; +} + +int cdrom_open_tray(libretro_vfs_implementation_file *stream) +{ + /* MMC Command: START STOP UNIT */ + unsigned char cdb[] = {0x1B, 0, 0, 0, 0x2, 0}; + int rv; + + cdrom_unlock(stream); + cdrom_stop(stream); + + rv = cdrom_send_command(stream, DIRECTION_NONE, NULL, 0, cdb, sizeof(cdb), 0); + +#ifdef CDROM_DEBUG + printf("[CDROM] open tray status code %d\n", rv); + fflush(stdout); +#endif + + if (rv) + return 1; + + return 0; +} + +int cdrom_close_tray(libretro_vfs_implementation_file *stream) +{ + /* MMC Command: START STOP UNIT */ + unsigned char cdb[] = {0x1B, 0, 0, 0, 0x3, 0}; + int rv = cdrom_send_command(stream, DIRECTION_NONE, NULL, 0, cdb, sizeof(cdb), 0); + +#ifdef CDROM_DEBUG + printf("[CDROM] close tray status code %d\n", rv); + fflush(stdout); +#endif + + if (rv) + return 1; + + return 0; +} + +struct string_list* cdrom_get_available_drives(void) +{ + struct string_list *list = string_list_new(); +#if defined(__linux__) && !defined(ANDROID) + struct string_list *dir_list = dir_list_new("/dev", NULL, false, false, false, false); + int i; + bool found = false; + + if (!dir_list) + return list; + + for (i = 0; i < (int)dir_list->size; i++) + { + if (string_starts_with_size(dir_list->elems[i].data, "/dev/sg", + STRLEN_CONST("/dev/sg"))) + { + libretro_vfs_implementation_file *stream; + char drive_model[32] = {0}; + char drive_string[33] = {0}; + union string_list_elem_attr attr = {0}; + int dev_index = 0; + RFILE *file = filestream_open( + dir_list->elems[i].data, RETRO_VFS_FILE_ACCESS_READ, 0); + bool is_cdrom = false; + + found = true; + + if (!file) + { +#ifdef CDROM_DEBUG + printf("[CDROM] Could not open %s, please check permissions.\n", dir_list->elems[i].data); + fflush(stdout); +#endif + continue; + } + + stream = filestream_get_vfs_handle(file); + cdrom_get_inquiry(stream, drive_model, sizeof(drive_model), &is_cdrom); + filestream_close(file); + + if (!is_cdrom) + continue; + + sscanf(dir_list->elems[i].data + STRLEN_CONST("/dev/sg"), + "%d", &dev_index); + + dev_index = '0' + dev_index; + attr.i = dev_index; + + if (!string_is_empty(drive_model)) + strlcat(drive_string, drive_model, sizeof(drive_string)); + else + strlcat(drive_string, "Unknown Drive", sizeof(drive_string)); + + string_list_append(list, drive_string, attr); + } + } + + if (!found) + { + char *buf = NULL; + int64_t len = 0; + + if (filestream_read_file("/proc/modules", (void**)&buf, &len)) + { +#ifdef CDROM_DEBUG + bool found = false; +#endif + struct string_list mods = {0}; + + string_list_initialize(&mods); + + if (string_split_noalloc(&mods, buf, "\n")) + { + for (i = 0; i < mods.size; i++) + { + if (strcasestr(mods.elems[i].data, "sg ")) + { +#ifdef CDROM_DEBUG + found = true; +#endif + break; + } + } + } + string_list_deinitialize(&mods); + free(buf); + +#ifdef CDROM_DEBUG + if (found) + { + printf("[CDROM] No sg devices found but kernel module is loaded.\n"); + fflush(stdout); + } + else + { + printf("[CDROM] No sg devices found and sg kernel module is not loaded.\n"); + fflush(stdout); + } +#endif + } +#ifdef CDROM_DEBUG + else + { + printf("[CDROM] No sg devices found, could not check if sg kernel module is loaded.\n"); + fflush(stdout); + } +#endif + } + + string_list_free(dir_list); +#endif +#if defined(_WIN32) && !defined(_XBOX) + DWORD drive_mask = GetLogicalDrives(); + int i; + + for (i = 0; i < sizeof(DWORD) * 8; i++) + { + char path[] = {"a:\\"}; + char cdrom_path[] = {"cdrom://a:/drive-track01.bin"}; + + path[0] += i; + cdrom_path[8] += i; + + /* this drive letter doesn't exist */ + if (!(drive_mask & (1 << i))) + continue; + + if (GetDriveType(path) != DRIVE_CDROM) + continue; + else + { + char drive_model[32] = {0}; + char drive_string[33] = {0}; + union string_list_elem_attr attr = {0}; + RFILE *file = filestream_open(cdrom_path, RETRO_VFS_FILE_ACCESS_READ, 0); + libretro_vfs_implementation_file *stream; + bool is_cdrom = false; + + if (!file) + continue; + + stream = filestream_get_vfs_handle(file); + cdrom_get_inquiry(stream, drive_model, sizeof(drive_model), &is_cdrom); + filestream_close(file); + + if (!is_cdrom) + continue; + + attr.i = path[0]; + + if (!string_is_empty(drive_model)) + strlcat(drive_string, drive_model, sizeof(drive_string)); + else + strlcat(drive_string, "Unknown Drive", sizeof(drive_string)); + + string_list_append(list, drive_string, attr); + } + } +#endif + return list; +} + +bool cdrom_is_media_inserted(libretro_vfs_implementation_file *stream) +{ + /* MMC Command: TEST UNIT READY */ + unsigned char cdb[] = {0x00, 0, 0, 0, 0, 0}; + int rv = cdrom_send_command(stream, DIRECTION_NONE, NULL, 0, cdb, sizeof(cdb), 0); + +#ifdef CDROM_DEBUG + printf("[CDROM] media inserted status code %d\n", rv); + fflush(stdout); +#endif + + /* Will also return false if the drive is simply not ready yet (tray open, disc spinning back up after tray closed etc). + * Command will not block or wait for media to become ready. */ + if (rv) + return false; + + return true; +} + +bool cdrom_drive_has_media(const char drive) +{ + RFILE *file; + char cdrom_path_bin[256] = {0}; + + cdrom_device_fillpath(cdrom_path_bin, sizeof(cdrom_path_bin), drive, 1, false); + + file = filestream_open(cdrom_path_bin, RETRO_VFS_FILE_ACCESS_READ, 0); + + if (file) + { + libretro_vfs_implementation_file *stream = filestream_get_vfs_handle(file); + bool has_media = false; + + has_media = cdrom_is_media_inserted(stream); + + filestream_close(file); + + return has_media; + } + + return false; +} + +bool cdrom_set_read_cache(libretro_vfs_implementation_file *stream, bool enabled) +{ + /* MMC Command: MODE SENSE (10) and MODE SELECT (10) */ + unsigned char cdb_sense_changeable[] = {0x5A, 0, 0x48, 0, 0, 0, 0, 0, 0x14, 0}; + unsigned char cdb_sense[] = {0x5A, 0, 0x8, 0, 0, 0, 0, 0, 0x14, 0}; + unsigned char cdb_select[] = {0x55, 0x10, 0, 0, 0, 0, 0, 0, 0x14, 0}; + unsigned char buf[20] = {0}; + int rv, i; + + rv = cdrom_send_command(stream, DIRECTION_IN, buf, sizeof(buf), cdb_sense_changeable, sizeof(cdb_sense_changeable), 0); + +#ifdef CDROM_DEBUG + printf("[CDROM] mode sense changeable status code %d\n", rv); + fflush(stdout); +#endif + + if (rv) + return false; + + if (!(buf[10] & 0x1)) + { + /* RCD (read cache disable) bit is not changeable */ +#ifdef CDROM_DEBUG + printf("[CDROM] RCD (read cache disable) bit is not changeable.\n"); + fflush(stdout); +#endif + return false; + } + + memset(buf, 0, sizeof(buf)); + + rv = cdrom_send_command(stream, DIRECTION_IN, buf, sizeof(buf), cdb_sense, sizeof(cdb_sense), 0); + +#ifdef CDROM_DEBUG + printf("mode sense status code %d\n", rv); + fflush(stdout); +#endif + + if (rv) + return false; + +#ifdef CDROM_DEBUG + printf("Mode sense data for caching mode page: "); + + for (i = 0; i < (int)sizeof(buf); i++) + { + printf("%02X ", buf[i]); + } + + printf("\n"); + fflush(stdout); +#endif + + /* "When transferred during execution of the MODE SELECT (10) command, Mode Data Length is reserved." */ + for (i = 0; i < 8; i++) + buf[i] = 0; + + if (enabled) + buf[10] &= ~1; + else + buf[10] |= 1; + + rv = cdrom_send_command(stream, DIRECTION_OUT, buf, sizeof(buf), cdb_select, sizeof(cdb_select), 0); + +#ifdef CDROM_DEBUG + printf("mode select status code %d\n", rv); + fflush(stdout); +#endif + + if (rv) + return false; + + return true; +} + +bool cdrom_get_timeouts(libretro_vfs_implementation_file *stream, cdrom_group_timeouts_t *timeouts) +{ + /* MMC Command: MODE SENSE (10) */ + int rv; + unsigned char cdb[] = {0x5A, 0, 0x1D, 0, 0, 0, 0, 0, 0x14, 0}; + unsigned char buf[20] = {0}; + unsigned short g1 = 0; + unsigned short g2 = 0; + unsigned short g3 = 0; + + if (!timeouts) + return false; + + rv = cdrom_send_command(stream, DIRECTION_IN, buf, sizeof(buf), cdb, sizeof(cdb), 0); + +#ifdef CDROM_DEBUG + printf("get timeouts status code %d\n", rv); + fflush(stdout); +#endif + + if (rv) + return false; + + g1 = buf[14] << 8 | buf[15]; + g2 = buf[16] << 8 | buf[17]; + g3 = buf[18] << 8 | buf[19]; + +#ifdef CDROM_DEBUG + { + int i; + + printf("Mode sense data for timeout groups: "); + + for (i = 0; i < (int)sizeof(buf); i++) + { + printf("%02X ", buf[i]); + } + + printf("\n"); + + printf("Group 1 Timeout: %d\n", g1); + printf("Group 2 Timeout: %d\n", g2); + printf("Group 3 Timeout: %d\n", g3); + + fflush(stdout); + } +#endif + + timeouts->g1_timeout = g1; + timeouts->g2_timeout = g2; + timeouts->g3_timeout = g3; + + return true; +} + +bool cdrom_has_atip(libretro_vfs_implementation_file *stream) +{ + /* MMC Command: READ TOC/PMA/ATIP */ + unsigned char cdb[] = {0x43, 0x2, 0x4, 0, 0, 0, 0, 0x9, 0x30, 0}; + unsigned char buf[32] = {0}; + unsigned short atip_len = 0; + int rv = cdrom_send_command(stream, DIRECTION_IN, buf, sizeof(buf), cdb, sizeof(cdb), 0); + + if (rv) + return false; + + atip_len = buf[0] << 8 | buf[1]; + +#ifdef CDROM_DEBUG + printf("ATIP Length %d, Disc Type %d, Disc Sub-Type %d\n", atip_len, (buf[6] >> 6) & 0x1, ((buf[6] >> 5) & 0x1) << 2 | ((buf[6] >> 4) & 0x1) << 1 | ((buf[6] >> 3) & 0x1) << 0); +#endif + + if (atip_len < 5) + return false; + + return true; +} + +void cdrom_device_fillpath(char *path, size_t len, char drive, unsigned char track, bool is_cue) +{ + size_t pos = 0; + + if (!path || len == 0) + return; + + if (is_cue) + { +#ifdef _WIN32 + pos = strlcpy(path, "cdrom://", len); + + if (len > pos) + path[pos++] = drive; + + pos = strlcat(path, ":/drive.cue", len); +#else +#ifdef __linux__ + pos = strlcpy(path, "cdrom://drive", len); + + if (len > pos + 1) + { + path[pos++] = drive; + path[pos] = '\0'; + } + + pos = strlcat(path, ".cue", len); +#endif +#endif + } + else + { +#ifdef _WIN32 + pos = strlcpy(path, "cdrom://", len); + + if (len > pos + 1) + { + path[pos++] = drive; + path[pos] = '\0'; + } + + pos += snprintf(path + pos, len - pos, ":/drive-track%02d.bin", track); +#else +#ifdef __linux__ + pos = strlcpy(path, "cdrom://drive", len); + + if (len > pos) + path[pos++] = drive; + + pos += snprintf(path + pos, len - pos, "-track%02d.bin", track); +#endif +#endif + } +} diff --git a/cores/saturn/libretro-common/compat/compat_posix_string.c b/cores/saturn/libretro-common/compat/compat_posix_string.c new file mode 100644 index 000000000..6a2f07ee4 --- /dev/null +++ b/cores/saturn/libretro-common/compat/compat_posix_string.c @@ -0,0 +1,104 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (compat_posix_string.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +#include + +#ifdef _WIN32 + +#undef strcasecmp +#undef strdup +#undef isblank +#undef strtok_r +#include +#include +#include +#include + +#include + +int retro_strcasecmp__(const char *a, const char *b) +{ + while (*a && *b) + { + int a_ = tolower(*a); + int b_ = tolower(*b); + + if (a_ != b_) + return a_ - b_; + + a++; + b++; + } + + return tolower(*a) - tolower(*b); +} + +char *retro_strdup__(const char *orig) +{ + size_t len = strlen(orig) + 1; + char *ret = (char*)malloc(len); + if (!ret) + return NULL; + + strlcpy(ret, orig, len); + return ret; +} + +int retro_isblank__(int c) +{ + return (c == ' ') || (c == '\t'); +} + +char *retro_strtok_r__(char *str, const char *delim, char **saveptr) +{ + char *first = NULL; + if (!saveptr || !delim) + return NULL; + + if (str) + *saveptr = str; + + do + { + char *ptr = NULL; + first = *saveptr; + while (*first && strchr(delim, *first)) + *first++ = '\0'; + + if (*first == '\0') + return NULL; + + ptr = first + 1; + + while (*ptr && !strchr(delim, *ptr)) + ptr++; + + *saveptr = ptr + (*ptr ? 1 : 0); + *ptr = '\0'; + } while (strlen(first) == 0); + + return first; +} + +#endif diff --git a/cores/saturn/libretro-common/compat/compat_strcasestr.c b/cores/saturn/libretro-common/compat/compat_strcasestr.c new file mode 100644 index 000000000..4129dab29 --- /dev/null +++ b/cores/saturn/libretro-common/compat/compat_strcasestr.c @@ -0,0 +1,58 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (compat_strcasestr.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +#include + +/* Pretty much strncasecmp. */ +static int casencmp(const char *a, const char *b, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) + { + int a_lower = tolower(a[i]); + int b_lower = tolower(b[i]); + if (a_lower != b_lower) + return a_lower - b_lower; + } + + return 0; +} + +char *strcasestr_retro__(const char *haystack, const char *needle) +{ + size_t i, search_off; + size_t hay_len = strlen(haystack); + size_t needle_len = strlen(needle); + + if (needle_len > hay_len) + return NULL; + + search_off = hay_len - needle_len; + for (i = 0; i <= search_off; i++) + if (!casencmp(haystack + i, needle, needle_len)) + return (char*)haystack + i; + + return NULL; +} diff --git a/cores/saturn/libretro-common/compat/compat_strl.c b/cores/saturn/libretro-common/compat/compat_strl.c new file mode 100644 index 000000000..3a6392cb1 --- /dev/null +++ b/cores/saturn/libretro-common/compat/compat_strl.c @@ -0,0 +1,62 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (compat_strl.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include + +/* Implementation of strlcpy()/strlcat() based on OpenBSD. */ + +#ifndef __MACH__ + +size_t strlcpy(char *dest, const char *source, size_t size) +{ + size_t src_size = 0; + size_t n = size; + + if (n) + while (--n && (*dest++ = *source++)) src_size++; + + if (!n) + { + if (size) *dest = '\0'; + while (*source++) src_size++; + } + + return src_size; +} + +size_t strlcat(char *dest, const char *source, size_t size) +{ + size_t len = strlen(dest); + + dest += len; + + if (len > size) + size = 0; + else + size -= len; + + return len + strlcpy(dest, source, size); +} +#endif diff --git a/cores/saturn/libretro-common/compat/fopen_utf8.c b/cores/saturn/libretro-common/compat/fopen_utf8.c new file mode 100644 index 000000000..384657b5d --- /dev/null +++ b/cores/saturn/libretro-common/compat/fopen_utf8.c @@ -0,0 +1,64 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (fopen_utf8.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x0500 || defined(_XBOX) +#ifndef LEGACY_WIN32 +#define LEGACY_WIN32 +#endif +#endif + +#ifdef _WIN32 +#undef fopen + +void *fopen_utf8(const char * filename, const char * mode) +{ +#if defined(LEGACY_WIN32) + char * filename_local = utf8_to_local_string_alloc(filename); + if (filename_local) + { + FILE *ret = fopen(filename_local, mode); + free(filename_local); + return ret; + } +#else + wchar_t * filename_w = utf8_to_utf16_string_alloc(filename); + if (filename_w) + { + FILE *ret = NULL; + wchar_t *mode_w = utf8_to_utf16_string_alloc(mode); + if (mode_w) + { + ret = _wfopen(filename_w, mode_w); + free(mode_w); + } + free(filename_w); + return ret; + } +#endif + return NULL; +} +#endif diff --git a/cores/saturn/libretro-common/encodings/encoding_utf.c b/cores/saturn/libretro-common/encodings/encoding_utf.c new file mode 100644 index 000000000..f9c594de6 --- /dev/null +++ b/cores/saturn/libretro-common/encodings/encoding_utf.c @@ -0,0 +1,532 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (encoding_utf.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +#if defined(_WIN32) && !defined(_XBOX) +#include +#elif defined(_XBOX) +#include +#endif + +#define UTF8_WALKBYTE(string) (*((*(string))++)) + +static unsigned leading_ones(uint8_t c) +{ + unsigned ones = 0; + while (c & 0x80) + { + ones++; + c <<= 1; + } + + return ones; +} + +/** + * utf8_conv_utf32: + * + * Simple implementation. Assumes the sequence is + * properly synchronized and terminated. + **/ +size_t utf8_conv_utf32(uint32_t *out, size_t out_chars, + const char *in, size_t in_size) +{ + unsigned i; + size_t ret = 0; + while (in_size && out_chars) + { + unsigned extra, shift; + uint32_t c; + uint8_t first = *in++; + unsigned ones = leading_ones(first); + + if (ones > 6 || ones == 1) /* Invalid or desync. */ + break; + + extra = ones ? ones - 1 : ones; + if (1 + extra > in_size) /* Overflow. */ + break; + + shift = (extra - 1) * 6; + c = (first & ((1 << (7 - ones)) - 1)) << (6 * extra); + + for (i = 0; i < extra; i++, in++, shift -= 6) + c |= (*in & 0x3f) << shift; + + *out++ = c; + in_size -= 1 + extra; + out_chars--; + ret++; + } + + return ret; +} + +/** + * utf16_conv_utf8: + * + * Leaf function. + **/ +bool utf16_conv_utf8(uint8_t *out, size_t *out_chars, + const uint16_t *in, size_t in_size) +{ + size_t out_pos = 0; + size_t in_pos = 0; + static const + uint8_t utf8_limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + + for (;;) + { + unsigned num_adds; + uint32_t value; + + if (in_pos == in_size) + { + *out_chars = out_pos; + return true; + } + value = in[in_pos++]; + if (value < 0x80) + { + if (out) + out[out_pos] = (char)value; + out_pos++; + continue; + } + + if (value >= 0xD800 && value < 0xE000) + { + uint32_t c2; + + if (value >= 0xDC00 || in_pos == in_size) + break; + c2 = in[in_pos++]; + if (c2 < 0xDC00 || c2 >= 0xE000) + break; + value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000; + } + + for (num_adds = 1; num_adds < 5; num_adds++) + if (value < (((uint32_t)1) << (num_adds * 5 + 6))) + break; + if (out) + out[out_pos] = (char)(utf8_limits[num_adds - 1] + + (value >> (6 * num_adds))); + out_pos++; + do + { + num_adds--; + if (out) + out[out_pos] = (char)(0x80 + + ((value >> (6 * num_adds)) & 0x3F)); + out_pos++; + }while (num_adds != 0); + } + + *out_chars = out_pos; + return false; +} + +/** + * utf8cpy: + * + * Acts mostly like strlcpy. + * + * Copies the given number of UTF-8 characters, + * but at most @d_len bytes. + * + * Always NULL terminates. Does not copy half a character. + * @s is assumed valid UTF-8. + * Use only if @chars is considerably less than @d_len. + * + * @return Number of bytes. + **/ +size_t utf8cpy(char *d, size_t d_len, const char *s, size_t chars) +{ + const uint8_t *sb = (const uint8_t*)s; + const uint8_t *sb_org = sb; + + if (!s) + return 0; + + while (*sb && chars-- > 0) + { + sb++; + while ((*sb & 0xC0) == 0x80) + sb++; + } + + if ((size_t)(sb - sb_org) > d_len-1 /* NUL */) + { + sb = sb_org + d_len-1; + while ((*sb & 0xC0) == 0x80) + sb--; + } + + memcpy(d, sb_org, sb-sb_org); + d[sb-sb_org] = '\0'; + + return sb-sb_org; +} + +/** + * utf8skip: + * + * Leaf function + **/ +const char *utf8skip(const char *str, size_t chars) +{ + const uint8_t *strb = (const uint8_t*)str; + + if (!chars) + return str; + + do + { + strb++; + while ((*strb & 0xC0)==0x80) + strb++; + chars--; + }while (chars); + + return (const char*)strb; +} + +/** + * utf8len: + * + * Leaf function. + **/ +size_t utf8len(const char *string) +{ + size_t ret = 0; + + if (!string) + return 0; + + while (*string) + { + if ((*string & 0xC0) != 0x80) + ret++; + string++; + } + return ret; +} + +/** + * utf8_walk: + * + * Does not validate the input. + * + * Leaf function. + * + * @return Returns garbage if it's not UTF-8. + **/ +uint32_t utf8_walk(const char **string) +{ + uint8_t first = UTF8_WALKBYTE(string); + uint32_t ret = 0; + + if (first < 128) + return first; + + ret = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F); + if (first >= 0xE0) + { + ret = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F); + if (first >= 0xF0) + { + ret = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F); + return ret | (first & 7) << 18; + } + return ret | (first & 15) << 12; + } + + return ret | (first & 31) << 6; +} + +static bool utf16_to_char(uint8_t **utf_data, + size_t *dest_len, const uint16_t *in) +{ + unsigned len = 0; + while (in[len] != '\0') + len++; + utf16_conv_utf8(NULL, dest_len, in, len); + *dest_len += 1; + if ((*utf_data = (uint8_t*)malloc(*dest_len)) != 0) + return utf16_conv_utf8(*utf_data, dest_len, in, len); + return false; +} + +/** + * utf16_to_char_string: + **/ +bool utf16_to_char_string(const uint16_t *in, char *s, size_t len) +{ + size_t dest_len = 0; + uint8_t *utf16_data = NULL; + bool ret = utf16_to_char(&utf16_data, &dest_len, in); + + if (ret) + { + utf16_data[dest_len] = 0; + strlcpy(s, (const char*)utf16_data, len); + } + + free(utf16_data); + utf16_data = NULL; + + return ret; +} + +#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE) +/** + * mb_to_mb_string_alloc: + * + * @return Returned pointer MUST be freed by the caller if non-NULL. + **/ +static char *mb_to_mb_string_alloc(const char *str, + enum CodePage cp_in, enum CodePage cp_out) +{ + wchar_t *path_buf_wide = NULL; + int path_buf_wide_len = MultiByteToWideChar(cp_in, 0, str, -1, NULL, 0); + + /* Windows 95 will return 0 from these functions with + * a UTF8 codepage set without MSLU. + * + * From an unknown MSDN version (others omit this info): + * - CP_UTF8 Windows 98/Me, Windows NT 4.0 and later: + * Translate using UTF-8. When this is set, dwFlags must be zero. + * - Windows 95: Under the Microsoft Layer for Unicode, + * MultiByteToWideChar also supports CP_UTF7 and CP_UTF8. + */ + + if (!path_buf_wide_len) + return strdup(str); + + if ((path_buf_wide = (wchar_t*) + calloc(path_buf_wide_len + sizeof(wchar_t), sizeof(wchar_t)))) + { + MultiByteToWideChar(cp_in, 0, + str, -1, path_buf_wide, path_buf_wide_len); + + if (*path_buf_wide) + { + int path_buf_len = WideCharToMultiByte(cp_out, 0, + path_buf_wide, -1, NULL, 0, NULL, NULL); + + if (path_buf_len) + { + char *path_buf = (char*) + calloc(path_buf_len + sizeof(char), sizeof(char)); + + if (path_buf) + { + WideCharToMultiByte(cp_out, 0, + path_buf_wide, -1, path_buf, + path_buf_len, NULL, NULL); + + free(path_buf_wide); + + if (*path_buf) + return path_buf; + + free(path_buf); + return NULL; + } + } + else + { + free(path_buf_wide); + return strdup(str); + } + } + + free(path_buf_wide); + } + + return NULL; +} +#endif + +/** + * utf8_to_local_string_alloc: + * + * @return Returned pointer MUST be freed by the caller if non-NULL. + **/ +char* utf8_to_local_string_alloc(const char *str) +{ + if (str && *str) +#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE) + return mb_to_mb_string_alloc(str, CODEPAGE_UTF8, CODEPAGE_LOCAL); +#else + return strdup(str); /* Assume string needs no modification if not on Windows */ +#endif + return NULL; +} + +/** + * local_to_utf8_string_alloc: + * + * @return Returned pointer MUST be freed by the caller if non-NULL. + **/ +char *local_to_utf8_string_alloc(const char *str) +{ + if (str && *str) +#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE) + return mb_to_mb_string_alloc(str, CODEPAGE_LOCAL, CODEPAGE_UTF8); +#else + return strdup(str); /* Assume string needs no modification if not on Windows */ +#endif + return NULL; +} + +/** + * utf8_to_utf16_string_alloc: + * + * @return Returned pointer MUST be freed by the caller if non-NULL. + **/ +wchar_t* utf8_to_utf16_string_alloc(const char *str) +{ +#ifdef _WIN32 + int len = 0; +#else + size_t len = 0; +#endif + wchar_t *buf = NULL; + + if (!str || !*str) + return NULL; + +#ifdef _WIN32 + if ((len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0))) + { + if (!(buf = (wchar_t*)calloc(len, sizeof(wchar_t)))) + return NULL; + + if ((MultiByteToWideChar(CP_UTF8, 0, str, -1, buf, len)) < 0) + { + free(buf); + return NULL; + } + } + else + { + /* Fallback to ANSI codepage instead */ + if ((len = MultiByteToWideChar(CP_ACP, 0, str, -1, NULL, 0))) + { + if (!(buf = (wchar_t*)calloc(len, sizeof(wchar_t)))) + return NULL; + + if ((MultiByteToWideChar(CP_ACP, 0, str, -1, buf, len)) < 0) + { + free(buf); + return NULL; + } + } + } +#else + /* NOTE: For now, assume non-Windows platforms' locale is already UTF-8. */ + if ((len = mbstowcs(NULL, str, 0) + 1)) + { + if (!(buf = (wchar_t*)calloc(len, sizeof(wchar_t)))) + return NULL; + + if ((mbstowcs(buf, str, len)) == (size_t)-1) + { + free(buf); + return NULL; + } + } +#endif + + return buf; +} + +/** + * utf16_to_utf8_string_alloc: + * + * @return Returned pointer MUST be freed by the caller if non-NULL. + **/ +char* utf16_to_utf8_string_alloc(const wchar_t *str) +{ +#ifdef _WIN32 + int len = 0; +#else + size_t len = 0; +#endif + char *buf = NULL; + + if (!str || !*str) + return NULL; + +#ifdef _WIN32 + { + UINT code_page = CP_UTF8; + + /* fallback to ANSI codepage instead */ + if (!(len = WideCharToMultiByte(code_page, + 0, str, -1, NULL, 0, NULL, NULL))) + { + code_page = CP_ACP; + len = WideCharToMultiByte(code_page, + 0, str, -1, NULL, 0, NULL, NULL); + } + + if (!(buf = (char*)calloc(len, sizeof(char)))) + return NULL; + + if (WideCharToMultiByte(code_page, + 0, str, -1, buf, len, NULL, NULL) < 0) + { + free(buf); + return NULL; + } + } +#else + /* NOTE: For now, assume non-Windows platforms' + * locale is already UTF-8. */ + if ((len = wcstombs(NULL, str, 0) + 1)) + { + if (!(buf = (char*)calloc(len, sizeof(char)))) + return NULL; + + if (wcstombs(buf, str, len) == (size_t)-1) + { + free(buf); + return NULL; + } + } +#endif + + return buf; +} diff --git a/cores/saturn/libretro-common/file/file_path.c b/cores/saturn/libretro-common/file/file_path.c new file mode 100644 index 000000000..19b649849 --- /dev/null +++ b/cores/saturn/libretro-common/file/file_path.c @@ -0,0 +1,1426 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (file_path.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include