From c1b5f784d185253cd998b158cdff6073c0bb48da Mon Sep 17 00:00:00 2001 From: DJLevel3 Date: Mon, 18 Nov 2024 14:21:32 -0500 Subject: [PATCH] Implement recording to PNG or QOI --- Source/img/qoixx.hpp | 1400 +++++++++++++++++++ Source/visualiser/OutputFragmentShader.glsl | 7 +- Source/visualiser/VisualiserComponent.cpp | 237 +++- Source/visualiser/VisualiserComponent.h | 19 +- 4 files changed, 1611 insertions(+), 52 deletions(-) create mode 100644 Source/img/qoixx.hpp diff --git a/Source/img/qoixx.hpp b/Source/img/qoixx.hpp new file mode 100644 index 0000000..ea24d3e --- /dev/null +++ b/Source/img/qoixx.hpp @@ -0,0 +1,1400 @@ +#ifndef QOIXX_HPP_INCLUDED_ +#define QOIXX_HPP_INCLUDED_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef QOIXX_NO_SIMD +#if defined(__ARM_FEATURE_SVE) +#include +#include +#elif defined(__aarch64__) +#include +#elif defined(__AVX2__) +#include +#endif +#endif + +namespace qoixx{ + +namespace detail{ + +template +requires(sizeof(T) == 1 && !std::same_as) +struct contiguous_puller{ + static constexpr bool is_contiguous = true; + const T* t; + inline std::uint8_t pull()noexcept{ + return static_cast(*t++); + } + inline const std::uint8_t* raw_pointer()noexcept{ + return reinterpret_cast(t); + } + inline void advance(std::size_t n)noexcept{ + t += n; + } +}; + +template +struct default_container_operator; + +template +requires(sizeof(T) == 1) +struct default_container_operator>{ + using target_type = std::vector; + static inline target_type construct(std::size_t size){ + target_type t(size); + return t; + } + struct pusher{ + static constexpr bool is_contiguous = true; + target_type* t; + std::size_t i = 0; + inline void push(std::uint8_t x)noexcept{ + (*t)[i++] = static_cast(x); + } + template + requires std::unsigned_integral && (sizeof(U) != 1) + inline void push(U t)noexcept{ + this->push(static_cast(t)); + } + inline target_type finalize()noexcept{ + t->resize(i); + return std::move(*t); + } + inline std::uint8_t* raw_pointer()noexcept{ + return reinterpret_cast(t->data())+i; + } + inline void advance(std::size_t n)noexcept{ + i += n; + } + }; + static constexpr pusher create_pusher(target_type& t)noexcept{ + return {&t}; + } + using puller = contiguous_puller; + static constexpr puller create_puller(const target_type& t)noexcept{ + return {t.data()}; + } + static inline std::size_t size(const target_type& t)noexcept{ + return t.size(); + } + static constexpr bool valid(const target_type& t)noexcept{ + return t.capacity() != 0; + } +}; + +template +requires(sizeof(T) == 1) +struct default_container_operator, std::size_t>>{ + using target_type = std::pair, std::size_t>; + static inline target_type construct(std::size_t size){ + return {typename target_type::first_type{static_cast(::operator new[](size))}, 0}; + } + struct pusher{ + static constexpr bool is_contiguous = true; + target_type* t; + inline void push(std::uint8_t x)noexcept{ + t->first[t->second++] = static_cast(x); + } + template + requires std::unsigned_integral && (sizeof(U) != 1) + inline void push(U t)noexcept{ + this->push(static_cast(t)); + } + inline target_type finalize()noexcept{ + return std::move(*t); + } + inline std::uint8_t* raw_pointer()noexcept{ + return reinterpret_cast(t->first.get())+t->second; + } + inline void advance(std::size_t n)noexcept{ + t->second += n; + } + }; + static constexpr pusher create_pusher(target_type& t)noexcept{ + return {&t}; + } + using puller = contiguous_puller; + static constexpr puller create_puller(const target_type& t)noexcept{ + return {t.first.get()}; + } + static inline std::size_t size(const target_type& t)noexcept{ + return t.second; + } + static constexpr bool valid(const target_type& t)noexcept{ + return t.first != nullptr; + } +}; + +template +requires(sizeof(T) == 1) +struct default_container_operator>{ + using target_type = std::pair; + using puller = contiguous_puller; + static constexpr puller create_puller(const target_type& t)noexcept{ + return {t.first}; + } + static inline std::size_t size(const target_type& t)noexcept{ + return t.second; + } + static inline bool valid(const target_type& t)noexcept{ + return t.first != nullptr; + } +}; + +} + +template +struct container_operator : detail::default_container_operator{}; + +class qoi{ + template + static inline void efficient_memcpy(void* dst, const void* src){ + if constexpr(Size == 3){ + std::memcpy(dst, src, 2); + std::memcpy(static_cast(dst)+2, static_cast(src)+2, 1); + } + else + std::memcpy(dst, src, Size); + } + template + static inline void push(T& dst, const void* src){ + if constexpr(T::is_contiguous){ + auto*const ptr = dst.raw_pointer(); + dst.advance(Size); + efficient_memcpy(ptr, src); + } + else{ + const auto* ptr = static_cast(src); + auto size = Size; + while(size --> 0) + dst.push(*ptr++); + } + } + template + static inline void pull(void* dst, T& src){ + if constexpr(T::is_contiguous){ + const auto*const ptr = src.raw_pointer(); + src.advance(Size); + efficient_memcpy(dst, ptr); + } + else{ + auto* ptr = static_cast(dst); + auto size = Size; + while(size --> 0) + *ptr++ = src.pull(); + } + } + enum chunk_tag : std::uint32_t{ + index = 0b0000'0000u, + diff = 0b0100'0000u, + luma = 0b1000'0000u, + run = 0b1100'0000u, + rgb = 0b1111'1110u, + rgba = 0b1111'1111u, + }; + static constexpr std::size_t index_size = 64u; + public: + enum class colorspace : std::uint8_t{ + srgb = 0, + linear = 1, + }; + struct desc{ + std::uint32_t width; + std::uint32_t height; + std::uint8_t channels; + qoi::colorspace colorspace; + constexpr bool operator==(const desc&)const noexcept = default; + }; + struct rgba_t{ + std::uint8_t r, g, b, a; + inline std::uint32_t v()const{ + static_assert(sizeof(rgba_t) == sizeof(std::uint32_t)); + if constexpr(std::endian::native == std::endian::little){ + std::uint32_t x; + std::memcpy(&x, this, sizeof(std::uint32_t)); + return x; + } + else + return std::uint32_t{r} | + std::uint32_t{g} << 8 | + std::uint32_t{b} << 16 | + std::uint32_t{a} << 24; + } + inline std::uint_fast32_t hash()const{ + static constexpr std::uint64_t constant = + static_cast(3u) << 56 | + 5u << 16 | + static_cast(7u) << 40 | + 11u; + const auto v = static_cast(this->v()); + return (((v<<32|v)&0xFF00FF0000FF00FF)*constant)>>56; + } + inline bool operator==(const rgba_t& rhs)const{ + return v() == rhs.v(); + } + inline bool operator!=(const rgba_t& rhs)const{ + return v() != rhs.v(); + } + }; + struct rgb_t{ + std::uint8_t r, g, b; + inline std::uint32_t v()const{ + static_assert(sizeof(rgb_t) == 3u); + if constexpr(std::endian::native == std::endian::little){ + std::uint32_t x = 255u << 24u; + efficient_memcpy<3>(&x, this); + return x; + } + else + return std::uint32_t{r} | + std::uint32_t{g} << 8 | + std::uint32_t{b} << 16 | + 255u << 24; + } + inline std::uint_fast32_t hash()const{ + static constexpr std::uint64_t constant = + static_cast(3u) << 56 | + 5u << 16 | + static_cast(7u) << 40 | + 11u; + const auto v = + static_cast(r) | + static_cast(g) << 40 | + static_cast(b) << 16 | + static_cast(0xff) << 56 ; + return (v*constant)>>56; + } + inline bool operator==(const rgb_t& rhs)const{ + return ((this->r^rhs.r)|(this->g^rhs.g)|(this->b^rhs.b)) == 0; + } + }; + static constexpr std::uint32_t magic = + 113u /*q*/ << 24 | 111u /*o*/ << 16 | 105u /*i*/ << 8 | 102u /*f*/ ; + static constexpr std::size_t header_size = + sizeof(magic) + + sizeof(std::declval().width) + + sizeof(std::declval().height) + + sizeof(std::declval().channels) + + sizeof(std::declval().colorspace); + static constexpr std::size_t pixels_max = 400000000u; + static constexpr std::uint8_t padding[8] = {0, 0, 0, 0, 0, 0, 0, 1}; + template + static inline std::uint32_t read_32(Puller& p){ + if constexpr(std::endian::native == std::endian::big && Puller::is_contiguous){ + std::uint32_t x; + pull(&x, p); + return x; + } + else{ + const auto _1 = p.pull(); + const auto _2 = p.pull(); + const auto _3 = p.pull(); + const auto _4 = p.pull(); + return static_cast(_1 << 24 | _2 << 16 | _3 << 8 | _4); + } + } + template + static inline void write_32(Pusher& p, std::uint32_t value){ + if constexpr(std::endian::native == std::endian::big && Pusher::is_contiguous) + push(p, value); + else{ + p.push((value & 0xff000000) >> 24); + p.push((value & 0x00ff0000) >> 16); + p.push((value & 0x0000ff00) >> 8); + p.push( value & 0x000000ff ); + } + } + private: + template + using local_rgba_pixel_t = std::conditional_t; + template + static constexpr local_rgba_pixel_t default_pixel()noexcept{ + if constexpr(Alpha) + return {0, 0, 0, 255}; + else + return {}; + } + template + struct local_pixel{ + std::uint8_t rgb = static_cast(chunk_tag::rgb); + local_rgba_pixel_t v; + }; + static_assert(std::has_unique_object_representations_v> and std::has_unique_object_representations_v>); + template + static inline void encode_body(Pusher& p, Puller& pixels, rgba_t (&index)[index_size], std::size_t px_len, local_rgba_pixel_t px_prev = default_pixel(), std::uint8_t prev_hash = static_cast(index_size), std::size_t run = 0){ + local_pixel px; + while(px_len--)[[likely]]{ + pull(&px.v, pixels); + if(px.v.v() == px_prev.v()){ + ++run; + continue; + } + if(run > 0){ + while(run >= 62)[[unlikely]]{ + static constexpr std::uint8_t x = chunk_tag::run | 61; + p.push(x); + run -= 62; + } + if(run > 1){ + p.push(chunk_tag::run | (run-1)); + run = 0; + } + else if(run == 1){ + if(prev_hash == index_size)[[unlikely]] + p.push(chunk_tag::run); + else + p.push(chunk_tag::index | prev_hash); + run = 0; + } + } + + const auto index_pos = px.v.hash() % index_size; + prev_hash = index_pos; + + do{ + if(index[index_pos].v() == px.v.v()){ + p.push(chunk_tag::index | index_pos); + break; + } + efficient_memcpy(index + index_pos, &px.v); + if constexpr(Channels == 3) + index[index_pos].a = 255u; + + if constexpr(Channels == 4) + if(px.v.a != px_prev.a){ + p.push(chunk_tag::rgba); + push<4>(p, &px.v); + break; + } + const auto vg_2 = static_cast(px.v.g) - static_cast(px_prev.g); + if(const std::uint8_t g = vg_2+32; g < 64){ + const auto vr = static_cast(px.v.r) - static_cast(px_prev.r) + 2; + const auto vg = vg_2 + 2; + const auto vb = static_cast(px.v.b) - static_cast(px_prev.b) + 2; + + if(static_cast(vr|vg|vb) < 4){ + p.push(chunk_tag::diff | vr << 4 | vg << 2 | vb); + break; + } + const auto vg_r = vr - vg + 8; + const auto vg_b = vb - vg + 8; + if(static_cast(vg_r|vg_b) < 16){ + p.push(chunk_tag::luma | g); + p.push(vg_r << 4 | vg_b); + } + else + push<4>(p, &px); + } + else + push<4>(p, &px); + }while(false); + efficient_memcpy(&px_prev, &px.v); + } + while(run >= 62)[[unlikely]]{ + static constexpr std::uint8_t x = chunk_tag::run | 61; + p.push(x); + run -= 62; + } + if(run > 0) + p.push(chunk_tag::run | (run-1)); + } +#ifndef QOIXX_NO_SIMD +#if defined(__ARM_FEATURE_SVE) + template + using pixels_type = std::conditional_t; + template + requires (std::same_as, svuint8_t> && ...) + static inline pixels_type create(Args&&... args)noexcept{ + if constexpr(sizeof...(Args) == 4) + return svcreate4_u8(std::forward(args)...); + else + return svcreate3_u8(std::forward(args)...); + } + template + static inline svuint8_t get(svuint8x4_t t)noexcept{ + return svget4_u8(t, ImmIndex); + } + template + static inline svuint8_t get(svuint8x3_t t)noexcept{ + return svget3_u8(t, ImmIndex); + } + template + static inline pixels_type load(svbool_t pg, const std::uint8_t* ptr)noexcept{ + if constexpr(Alpha) + return svld4_u8(pg, ptr); + else + return svld3_u8(pg, ptr); + } + template + static inline void encode_sve(Pusher& p_, Puller& pixels_, const desc& desc){ + static constexpr bool Alpha = Channels == 4; + std::uint8_t* p = p_.raw_pointer(); + const std::uint8_t* pixels = pixels_.raw_pointer(); + + rgba_t index[index_size] = {}; + + const auto zero = svdup_n_u8(0); + const auto iota = svindex_u8(0, 1); + + pixels_type prev; + if constexpr(Alpha) + prev = create(zero, zero, zero, svdup_n_u8(255)); + else + prev = create(zero, zero, zero); + + std::size_t run = 0; + rgba_t px = {0, 0, 0, 255}; + auto prev_hash = static_cast(index_size); + + const std::size_t px_len = desc.width * desc.height; + static constexpr auto vector_lanes = SVERegisterSize/8; + for(std::size_t i = 0; i < px_len; i += vector_lanes){ + const auto mask = svwhilelt_b8_u64(i, px_len); + const auto num = std::min(px_len-i, vector_lanes); + const auto pxs = load(mask, pixels); + static constexpr std::uint64_t imm = SVERegisterSize/8-1; + auto rv = svsub_u8_x(mask, get<0>(pxs), svext_u8(get<0>(prev), get<0>(pxs), imm)); + auto gv = svsub_u8_x(mask, get<1>(pxs), svext_u8(get<1>(prev), get<1>(pxs), imm)); + auto bv = svsub_u8_x(mask, get<2>(pxs), svext_u8(get<2>(prev), get<2>(pxs), imm)); + [[maybe_unused]] svbool_t av; + bool alpha = true; + if constexpr(Alpha){ + av = svcmpeq_n_u8(mask, svsub_u8_x(mask, get<3>(pxs), svext_u8(get<3>(prev), get<3>(pxs), imm)), 0); + alpha = !svptest_any(mask, svnot_b_z(mask, av)); + } + auto runv = svcmpeq_n_u8(mask, svorr_u8_x(mask, svorr_u8_x(mask, rv, gv), bv), 0); + if constexpr(Alpha) + runv = svand_b_z(mask, runv, av); + const auto not_runv = svnot_b_z(mask, runv); + if(!svptest_any(mask, not_runv)){ + run += num; + pixels += num*Channels; + continue; + } + const auto r = svminv_u8(not_runv, iota); + run += r; + pixels += r*Channels; + if(run > 0){ + while(run >= 62)[[unlikely]]{ + static constexpr std::uint8_t x = chunk_tag::run | 61; + *p++ = x; + run -= 62; + } + if(run > 1){ + *p++ = chunk_tag::run | (run-1); + run = 0; + } + else if(run == 1){ + if(prev_hash == index_size)[[unlikely]] + *p++ = chunk_tag::run; + else + *p++ = chunk_tag::index | prev_hash; + run = 0; + } + } + rv = svadd_n_u8_x(mask, rv, 2); + gv = svadd_n_u8_x(mask, gv, 2); + bv = svadd_n_u8_x(mask, bv, 2); + const auto diffv = svorr_u8_z(svcmplt_n_u8(mask, svorr_u8_z(mask, svorr_u8_x(mask, rv, gv), bv), 4), svorr_n_u8_x(mask, svlsl_n_u8_x(mask, rv, 4), chunk_tag::diff), svorr_u8_x(mask, svlsl_n_u8_x(mask, gv, 2), bv)); + rv = svadd_n_u8_x(mask, svsub_u8_x(mask, rv, gv), 8); + bv = svadd_n_u8_x(mask, svsub_u8_x(mask, bv, gv), 8); + gv = svadd_n_u8_x(mask, gv, 30); + const auto lu = svorr_n_u8_z(svcmpeq_n_u8(mask, svorr_u8_x(mask, svand_n_u8_x(mask, svorr_u8_x(mask, rv, bv), 0xf0), svand_n_u8_x(mask, gv, 0xc0)), 0), gv, chunk_tag::luma); + const auto ma = svorr_u8_x(mask, svlsl_n_u8_x(mask, rv, 4), bv); + svuint8_t hash; + if constexpr(Alpha) + hash = svand_n_u8_x(mask, svadd_u8_x(mask, svadd_u8_x(mask, svmul_n_u8_x(mask, get<0>(pxs), 3), svmul_n_u8_x(mask, get<1>(pxs), 5)), svadd_u8_x(mask, svmul_n_u8_x(mask, get<2>(pxs), 7), svmul_n_u8_x(mask, get<3>(pxs), 11))), 63); + else + hash = svand_n_u8_x(mask, svadd_u8_x(mask, svadd_u8_x(mask, svmul_n_u8_x(mask, get<0>(pxs), 3), svmul_n_u8_x(mask, get<1>(pxs), 5)), svadd_n_u8_x(mask, svmul_n_u8_x(mask, get<2>(pxs), 7), static_cast(255*11))), 63); + std::uint8_t runs[SVERegisterSize/8], diffs[SVERegisterSize/8], lumas[SVERegisterSize/8*2], hashs[SVERegisterSize/8]; + [[maybe_unused]] std::uint8_t alphas[SVERegisterSize/8]; + svst1_u8(mask, runs, svadd_n_u8_m(runv, zero, 1)); + svst1_u8(mask, diffs, diffv); + const auto luma = svcreate2_u8(lu, ma); + svst2_u8(mask, lumas, luma); + svst1_u8(mask, hashs, hash); + if constexpr(Alpha) + if(!alpha) + svst1_u8(mask, alphas, svadd_n_u8_m(av, zero, 1)); + for(std::size_t i = r; i < num; ++i){ + if(runs[i]){ + ++run; + pixels += Channels; + continue; + } + if(run > 1){ + *p++ = chunk_tag::run | (run-1); + run = 0; + } + else if(run == 1){ + if(prev_hash == index_size)[[unlikely]] + *p++ = chunk_tag::run; + else + *p++ = chunk_tag::index | prev_hash; + run = 0; + } + const auto index_pos = hashs[i]; + prev_hash = index_pos; + efficient_memcpy(&px, pixels); + pixels += Channels; + if(index[index_pos] == px){ + *p++ = chunk_tag::index | index_pos; + continue; + } + index[index_pos] = px; + + if constexpr(Alpha) + if(!alpha && !alphas[i]){ + *p++ = chunk_tag::rgba; + std::memcpy(p, &px, 4); + p += 4; + continue; + } + if(diffs[i]) + *p++ = diffs[i]; + else if(lumas[i*2]){ + std::memcpy(p, lumas + i*2, 2); + p += 2; + } + else{ + *p++ = chunk_tag::rgb; + efficient_memcpy<3>(p, &px); + p += 3; + } + } + prev = pxs; + } + while(run >= 62)[[unlikely]]{ + static constexpr std::uint8_t x = chunk_tag::run | 61; + *p++ = x; + run -= 62; + } + if(run > 0){ + *p++ = chunk_tag::run | (run-1); + run = 0; + } + p_.advance(p-p_.raw_pointer()); + pixels_.advance(px_len*Channels); + + push(p_, padding); + } +#elif defined(__aarch64__) + template + using pixels_type = std::conditional_t; + template + static inline pixels_type load(const std::uint8_t* ptr)noexcept{ + if constexpr(Alpha) + return vld4q_u8(ptr); + else + return vld3q_u8(ptr); + } + static constexpr std::size_t simd_lanes = 16; + template + static inline void encode_neon(Pusher& p_, Puller& pixels_, const desc& desc){ + static constexpr bool Alpha = Channels == 4; + std::uint8_t* p = p_.raw_pointer(); + const std::uint8_t* pixels = pixels_.raw_pointer(); + + rgba_t index[index_size] = {}; + + const auto zero = vdupq_n_u8(0); + static constexpr std::uint8_t iota_[simd_lanes] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + const auto iota = vld1q_u8(iota_); + + pixels_type prev; + prev.val[0] = prev.val[1] = prev.val[2] = zero; + if constexpr(Alpha) + prev.val[3] = vdupq_n_u8(255); + + std::size_t run = 0; + rgba_t px = {0, 0, 0, 255}; + auto prev_hash = static_cast(index_size); + + std::size_t px_len = desc.width * desc.height; + std::size_t simd_len = px_len / simd_lanes; + const std::size_t simd_len_16 = simd_len * simd_lanes; + px_len -= simd_len_16; + pixels_.advance(simd_len_16*Channels); + while(simd_len--){ + const auto pxs = load(pixels); + pixels_type diff; + diff.val[0] = vsubq_u8(pxs.val[0], vextq_u8(prev.val[0], pxs.val[0], simd_lanes-1)); + diff.val[1] = vsubq_u8(pxs.val[1], vextq_u8(prev.val[1], pxs.val[1], simd_lanes-1)); + diff.val[2] = vsubq_u8(pxs.val[2], vextq_u8(prev.val[2], pxs.val[2], simd_lanes-1)); + bool alpha = true; + if constexpr(Alpha){ + diff.val[3] = vsubq_u8(pxs.val[3], vextq_u8(prev.val[3], pxs.val[3], simd_lanes-1)); + diff.val[3] = vceqq_u8(diff.val[3], zero); + alpha = vminvq_u8(diff.val[3]) != 0; + } + auto runv = vceqq_u8(vorrq_u8(vorrq_u8(diff.val[0], diff.val[1]), diff.val[2]), zero); + if(vminvq_u8(runv) != 0 && alpha){ + run += simd_lanes; + pixels += simd_lanes*Channels; + continue; + } + if constexpr(Alpha) + runv = vandq_u8(runv, diff.val[3]); + const auto r = vminvq_u8(vorrq_u8(vandq_u8(vmvnq_u8(runv), iota), runv)); + run += r; + pixels += r*Channels; + if(run > 0){ + while(run >= 62)[[unlikely]]{ + static constexpr std::uint8_t x = chunk_tag::run | 61; + *p++ = x; + run -= 62; + } + if(run > 1){ + *p++ = chunk_tag::run | (run-1); + run = 0; + } + else if(run == 1){ + if(prev_hash == index_size)[[unlikely]] + *p++ = chunk_tag::run; + else + *p++ = chunk_tag::index | prev_hash; + run = 0; + } + } + const auto two = vdupq_n_u8(2); + diff.val[0] = vaddq_u8(diff.val[0], two); + diff.val[1] = vaddq_u8(diff.val[1], two); + diff.val[2] = vaddq_u8(diff.val[2], two); + const auto four = vdupq_n_u8(4); + const auto diffv = vandq_u8(vorrq_u8(vorrq_u8(vdupq_n_u8(chunk_tag::diff), vshlq_n_u8(diff.val[0], 4)), vorrq_u8(vshlq_n_u8(diff.val[1], 2), diff.val[2])), vcltq_u8(vorrq_u8(vorrq_u8(diff.val[0], diff.val[1]), diff.val[2]), four)); + const auto eight = vdupq_n_u8(8); + diff.val[0] = vaddq_u8(vsubq_u8(diff.val[0], diff.val[1]), eight); + diff.val[2] = vaddq_u8(vsubq_u8(diff.val[2], diff.val[1]), eight); + diff.val[1] = vaddq_u8(diff.val[1], vdupq_n_u8(30)); + const auto lu = vandq_u8(vorrq_u8(vdupq_n_u8(chunk_tag::luma), diff.val[1]), vceqq_u8(vorrq_u8(vandq_u8(vorrq_u8(diff.val[0], diff.val[2]), vdupq_n_u8(0xf0)), vandq_u8(diff.val[1], vdupq_n_u8(0xc0))), zero)); + const auto ma = vorrq_u8(vshlq_n_u8(diff.val[0], 4), diff.val[2]); + uint8x16_t hash; + if constexpr(Alpha) + hash = vandq_u8(vaddq_u8(vaddq_u8(vmulq_u8(pxs.val[0], vdupq_n_u8(3)), vmulq_u8(pxs.val[1], vdupq_n_u8(5))), vaddq_u8(vmulq_u8(pxs.val[2], vdupq_n_u8(7)), vmulq_u8(pxs.val[3], vdupq_n_u8(11)))), vdupq_n_u8(63)); + else + hash = vandq_u8(vaddq_u8(vaddq_u8(vmulq_u8(pxs.val[0], vdupq_n_u8(3)), vmulq_u8(pxs.val[1], vdupq_n_u8(5))), vaddq_u8(vmulq_u8(pxs.val[2], vdupq_n_u8(7)), vdupq_n_u8(static_cast(255*11)))), vdupq_n_u8(63)); + std::uint8_t runs[simd_lanes], diffs[simd_lanes], lumas[simd_lanes*2], hashs[simd_lanes]; + [[maybe_unused]] std::uint8_t alphas[simd_lanes]; + vst1q_u8(runs, runv); + vst1q_u8(diffs, diffv); + vst2q_u8(lumas, (uint8x16x2_t{lu, ma})); + vst1q_u8(hashs, hash); + if constexpr(Alpha) + if(!alpha) + vst1q_u8(alphas, diff.val[3]); + for(std::size_t i = r; i < simd_lanes; ++i){ + if(runs[i]){ + ++run; + pixels += Channels; + continue; + } + if(run > 1){ + *p++ = chunk_tag::run | (run-1); + run = 0; + } + else if(run == 1){ + if(prev_hash == index_size)[[unlikely]] + *p++ = chunk_tag::run; + else + *p++ = chunk_tag::index | prev_hash; + run = 0; + } + const auto index_pos = hashs[i]; + prev_hash = index_pos; + efficient_memcpy(&px, pixels); + pixels += Channels; + if(index[index_pos] == px){ + *p++ = chunk_tag::index | index_pos; + continue; + } + index[index_pos] = px; + + if constexpr(Alpha) + if(!alpha && !alphas[i]){ + *p++ = chunk_tag::rgba; + std::memcpy(p, &px, 4); + p += 4; + continue; + } + if(diffs[i]) + *p++ = diffs[i]; + else if(lumas[i*2]){ + std::memcpy(p, lumas + i*2, 2); + p += 2; + } + else{ + *p++ = chunk_tag::rgb; + efficient_memcpy<3>(p, &px); + p += 3; + } + } + prev = pxs; + } + p_.advance(p-p_.raw_pointer()); + + if constexpr(Alpha) + encode_body(p_, pixels_, index, px_len, px, prev_hash, run); + else{ + rgb_t px_prev; + efficient_memcpy<3>(&px_prev, &px); + encode_body(p_, pixels_, index, px_len, px_prev, prev_hash, run); + } + + push(p_, padding); + } +#elif defined(__AVX2__) + static constexpr unsigned de_bruijn_bit_position_sequence[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + static constexpr unsigned lsb32(std::uint32_t x)noexcept{ + return de_bruijn_bit_position_sequence[(static_cast(x&-static_cast(x))*0x077cb531u) >> 27]; + } + template + static inline __m256i slli_epi8(__m256i v)noexcept{ + const auto mask = _mm256_set1_epi8(static_cast(0xff << M) >> M); + return _mm256_slli_epi16(_mm256_and_si256(v, mask), M); + } + template + static inline __m256i mul_epi8(__m256i v)noexcept{ + if constexpr(M == 0) + return _mm256_setzero_si256(); + else if constexpr(M == 1) + return v; + else if constexpr(M == 2) + return slli_epi8<1>(v); + else if constexpr(M == 3) + return _mm256_add_epi8(slli_epi8<1>(v), v); + else if constexpr(M == 4) + return slli_epi8<2>(v); + else if constexpr(M == 5) + return _mm256_add_epi8(slli_epi8<2>(v), v); + else if constexpr(M == 6) + return _mm256_add_epi8(slli_epi8<2>(v), slli_epi8<1>(v)); + else if constexpr(M == 7) + return _mm256_sub_epi8(slli_epi8<3>(v), v); + else if constexpr(M == 8) + return slli_epi8<3>(v); + else if constexpr(M == 9) + return _mm256_add_epi8(slli_epi8<3>(v), v); + else if constexpr(M == 10) + return _mm256_add_epi8(slli_epi8<3>(v), slli_epi8<1>(v)); + else if constexpr(M == 11) + return _mm256_add_epi8(_mm256_add_epi8(slli_epi8<3>(v), slli_epi8<1>(v)), v); + else if constexpr(M == 12) + return _mm256_add_epi8(slli_epi8<3>(v), slli_epi8<2>(v)); + else if constexpr(M == 13) + return _mm256_add_epi8(_mm256_add_epi8(slli_epi8<3>(v), slli_epi8<2>(v)), v); + else if constexpr(M == 14) + return _mm256_sub_epi8(slli_epi8<4>(v), slli_epi8<1>(v)); + else if constexpr(M == 15) + return _mm256_sub_epi8(slli_epi8<4>(v), v); + else + static_assert(M <= 15); + } + static inline __m256i prev_vector(__m256i pxs, __m256i prev)noexcept{ + const auto permute = _mm256_permute2x128_si256(pxs, pxs, 0x08); + const auto inserted = _mm256_inserti128_si256(permute, _mm256_extracti128_si256(prev, 1), 0); + return _mm256_alignr_epi8(pxs, inserted, 15); + } + template + struct pixels_type{ + __m256i val[3+Alpha]; + }; + static constexpr std::size_t simd_lanes = 256/8; + template + static inline pixels_type load(const std::uint8_t* ptr)noexcept{ + if constexpr(Alpha){ + const auto t1 = _mm256_loadu_si256(reinterpret_cast(ptr)); + const auto t2 = _mm256_loadu_si256(reinterpret_cast(ptr+simd_lanes)); + const auto t3 = _mm256_loadu_si256(reinterpret_cast(ptr+simd_lanes*2)); + const auto t4 = _mm256_loadu_si256(reinterpret_cast(ptr+simd_lanes*3)); + const auto lo12 = _mm256_unpacklo_epi8(t1, t2); + const auto lo34 = _mm256_unpacklo_epi8(t3, t4); + const auto lolo12lo34 = _mm256_unpacklo_epi16(lo12, lo34); + const auto hilo12lo34 = _mm256_unpackhi_epi16(lo12, lo34); + const auto lololo12lo34hilo12lo34 = _mm256_unpacklo_epi32(lolo12lo34, hilo12lo34); + const auto hilolo12lo34hilo12lo34 = _mm256_unpackhi_epi32(lolo12lo34, hilo12lo34); + const auto hi12 = _mm256_unpackhi_epi8(t1, t2); + const auto hi34 = _mm256_unpackhi_epi8(t3, t4); + const auto lohi12hi34 = _mm256_unpacklo_epi16(hi12, hi34); + const auto hihi12hi34 = _mm256_unpackhi_epi16(hi12, hi34); + const auto lolohi12hi34hihi12hi34 = _mm256_unpacklo_epi32(lohi12hi34, hihi12hi34); + const auto lolololo12lo34hilo12lo34lolohi12hi34hihi12hi34 = _mm256_unpacklo_epi64(lololo12lo34hilo12lo34, lolohi12hi34hihi12hi34); + const auto hilololo12lo34hilo12lo34lolohi12hi34hihi12hi34 = _mm256_unpackhi_epi64(lololo12lo34hilo12lo34, lolohi12hi34hihi12hi34); + const auto hilohi12hi34hihi12hi34 = _mm256_unpackhi_epi32(lohi12hi34, hihi12hi34); + const auto lohilolo12lo34hilo12lo34hilohi12hi34hihi12hi34 = _mm256_unpacklo_epi64(hilolo12lo34hilo12lo34, hilohi12hi34hihi12hi34); + const auto hihilolo12lo34hilo12lo34hilohi12hi34hihi12hi34 = _mm256_unpackhi_epi64(hilolo12lo34hilo12lo34, hilohi12hi34hihi12hi34); + const auto mask1 = _mm256_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15); + const auto mask2 = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7); + const auto r = _mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(lolololo12lo34hilo12lo34lolohi12hi34hihi12hi34, mask1), mask2); + const auto g = _mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(hilololo12lo34hilo12lo34lolohi12hi34hihi12hi34, mask1), mask2); + const auto b = _mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(lohilolo12lo34hilo12lo34hilohi12hi34hihi12hi34, mask1), mask2); + const auto a = _mm256_permutevar8x32_epi32(_mm256_shuffle_epi8(hihilolo12lo34hilo12lo34hilohi12hi34hihi12hi34, mask1), mask2); + return {{r, g, b, a}}; + } + else{ + const auto t1 = _mm_loadu_si128(reinterpret_cast(ptr)); + const auto t2 = _mm_loadu_si128(reinterpret_cast(ptr+simd_lanes/2)); + const auto t3 = _mm_loadu_si128(reinterpret_cast(ptr+simd_lanes)); + const auto t4 = _mm_loadu_si128(reinterpret_cast(ptr+simd_lanes*3/2)); + const auto t5 = _mm_loadu_si128(reinterpret_cast(ptr+simd_lanes*2)); + const auto t6 = _mm_loadu_si128(reinterpret_cast(ptr+simd_lanes*5/2)); + const auto mask01 = _mm_setr_epi8(0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13, 2, 5, 8, 11, 14); + const auto mask02 = _mm_setr_epi8(2, 5, 8, 11, 14, 0, 3, 6, 9, 12, 15, 1, 4, 7, 10, 13); + const auto mask03 = _mm_setr_epi8(1, 4, 7, 10, 13, 2, 5, 8, 11, 14, 0, 3, 6, 9, 12, 15); + static constexpr char _128 = static_cast(0b1000'0000); + const auto mask11 = _mm_setr_epi8(0, 0, 0, 0, 0, 0, _128, _128, _128, _128, _128, _128, _128, _128, _128, _128); + const auto mask21 = _mm_setr_epi8(_128, _128, _128, _128, _128, _128, _128, _128, _128, _128, _128, 0, 0, 0, 0, 0); + const auto mask12 = _mm_setr_epi8(_128, _128, _128, _128, _128, 0, 0, 0, 0, 0, 0, _128, _128, _128, _128, _128); + const auto mask22 = _mm_setr_epi8(0, 0, 0, 0, 0, _128, _128, _128, _128, _128, _128, _128, _128, _128, _128, _128); + const auto mask13 = _mm_setr_epi8(_128, _128, _128, _128, _128, _128, _128, _128, _128, _128, 0, 0, 0, 0, 0, 0); + const auto mask23 = _mm_setr_epi8(_128, _128, _128, _128, _128, 0, 0, 0, 0, 0, _128, _128, _128, _128, _128, _128); + const auto x1 = _mm_shuffle_epi8(t1, mask01); + const auto x2 = _mm_shuffle_epi8(t2, mask02); + const auto x3 = _mm_shuffle_epi8(t3, mask03); + const auto x4 = _mm_shuffle_epi8(t4, mask01); + const auto x5 = _mm_shuffle_epi8(t5, mask02); + const auto x6 = _mm_shuffle_epi8(t6, mask03); + const auto r1 = _mm_blendv_epi8(_mm_alignr_epi8(x3, x3, 5), _mm_blendv_epi8(x1, _mm_alignr_epi8(x2, x2, 10), mask11), mask21); + const auto g1 = _mm_blendv_epi8(_mm_alignr_epi8(x1, x1, 6), _mm_blendv_epi8(x2, _mm_alignr_epi8(x3, x3, 10), mask12), mask22); + const auto b1 = _mm_blendv_epi8(_mm_alignr_epi8(x2, x2, 6), _mm_blendv_epi8(x3, _mm_alignr_epi8(x1, x1, 11), mask13), mask23); + const auto r2 = _mm_blendv_epi8(_mm_alignr_epi8(x6, x6, 5), _mm_blendv_epi8(x4, _mm_alignr_epi8(x5, x5, 10), mask11), mask21); + const auto g2 = _mm_blendv_epi8(_mm_alignr_epi8(x4, x4, 6), _mm_blendv_epi8(x5, _mm_alignr_epi8(x6, x6, 10), mask12), mask22); + const auto b2 = _mm_blendv_epi8(_mm_alignr_epi8(x5, x5, 6), _mm_blendv_epi8(x6, _mm_alignr_epi8(x4, x4, 11), mask13), mask23); + const auto r = _mm256_inserti128_si256(_mm256_castsi128_si256(r1), r2, 1); + const auto g = _mm256_inserti128_si256(_mm256_castsi128_si256(g1), g2, 1); + const auto b = _mm256_inserti128_si256(_mm256_castsi128_si256(b1), b2, 1); + return {{r, g, b}}; + } + } + template + static inline void encode_avx2(Pusher& p_, Puller& pixels_, const desc& desc){ + static constexpr bool Alpha = Channels == 4; + std::uint8_t* p = p_.raw_pointer(); + const std::uint8_t* pixels = pixels_.raw_pointer(); + + rgba_t index[index_size] = {}; + + const auto zero = _mm256_setzero_si256(); + + pixels_type prev; + prev.val[0] = prev.val[1] = prev.val[2] = zero; + if constexpr(Alpha) + prev.val[3] = _mm256_set1_epi8(static_cast(0xff)); + + std::size_t run = 0; + rgba_t px = {0, 0, 0, 255}; + auto prev_hash = static_cast(index_size); + + std::size_t px_len = desc.width * desc.height; + std::size_t simd_len = px_len / simd_lanes; + const std::size_t simd_len_32 = simd_len * simd_lanes; + px_len -= simd_len_32; + pixels_.advance(simd_len_32*Channels); + while(simd_len--){ + const auto pxs = load(pixels); + pixels_type diff; + diff.val[0] = _mm256_sub_epi8(pxs.val[0], prev_vector(pxs.val[0], prev.val[0])); + diff.val[1] = _mm256_sub_epi8(pxs.val[1], prev_vector(pxs.val[1], prev.val[1])); + diff.val[2] = _mm256_sub_epi8(pxs.val[2], prev_vector(pxs.val[2], prev.val[2])); + bool alpha = true; + if constexpr(Alpha){ + diff.val[3] = _mm256_sub_epi8(pxs.val[3], prev_vector(pxs.val[3], prev.val[3])); + alpha = _mm256_testz_si256(diff.val[3], diff.val[3]); + diff.val[3] = _mm256_cmpeq_epi8(diff.val[3], zero); + } + const auto ored = _mm256_or_si256(_mm256_or_si256(diff.val[0], diff.val[1]), diff.val[2]); + auto runv = _mm256_cmpeq_epi8(ored, zero); + if(_mm256_testz_si256(ored, ored) && alpha){ + run += simd_lanes; + pixels += simd_lanes*Channels; + continue; + } + if constexpr(Alpha) + runv = _mm256_and_si256(runv, diff.val[3]); + const auto r = lsb32(~_mm256_movemask_epi8(runv)); + run += r; + pixels += r*Channels; + if(run > 0){ + while(run >= 62)[[unlikely]]{ + static constexpr std::uint8_t x = chunk_tag::run | 61; + *p++ = x; + run -= 62; + } + if(run > 1){ + *p++ = static_cast(chunk_tag::run | (run-1)); + run = 0; + } + else if(run == 1){ + if(prev_hash == index_size)[[unlikely]] + *p++ = chunk_tag::run; + else + *p++ = chunk_tag::index | prev_hash; + run = 0; + } + } + const auto two = _mm256_set1_epi8(2); + diff.val[0] = _mm256_add_epi8(diff.val[0], two); + diff.val[1] = _mm256_add_epi8(diff.val[1], two); + diff.val[2] = _mm256_add_epi8(diff.val[2], two); + const auto diffor = _mm256_or_si256(_mm256_or_si256(diff.val[0], diff.val[1]), diff.val[2]); + const auto diffv = _mm256_and_si256(_mm256_or_si256(_mm256_or_si256(_mm256_set1_epi8(chunk_tag::diff), slli_epi8<4>(diff.val[0])), _mm256_or_si256(slli_epi8<2>(diff.val[1]), diff.val[2])), _mm256_cmpeq_epi8(_mm256_and_si256(diffor, _mm256_set1_epi8(0b11)), diffor)); + const auto eight = _mm256_set1_epi8(8); + diff.val[0] = _mm256_add_epi8(_mm256_sub_epi8(diff.val[0], diff.val[1]), eight); + diff.val[2] = _mm256_add_epi8(_mm256_sub_epi8(diff.val[2], diff.val[1]), eight); + diff.val[1] = _mm256_add_epi8(diff.val[1], _mm256_set1_epi8(30)); + const auto luma_mask = _mm256_setr_epi32(0, 1, 4, 5, 2, 3, 6, 7); + const auto lu = _mm256_permutevar8x32_epi32(_mm256_and_si256(_mm256_or_si256(_mm256_set1_epi8(static_cast(chunk_tag::luma)), diff.val[1]), _mm256_cmpeq_epi8(_mm256_or_si256(_mm256_and_si256(_mm256_or_si256(diff.val[0], diff.val[2]), _mm256_set1_epi8(static_cast(0xf0))), _mm256_and_si256(diff.val[1], _mm256_set1_epi8(static_cast(0xc0)))), zero)), luma_mask); + const auto ma = _mm256_permutevar8x32_epi32(_mm256_or_si256(slli_epi8<4>(diff.val[0]), diff.val[2]), luma_mask); + __m256i hash; + if constexpr(Alpha) + hash = _mm256_and_si256(_mm256_add_epi8(_mm256_add_epi8(mul_epi8<3>(pxs.val[0]), mul_epi8<5>(pxs.val[1])), _mm256_add_epi8(mul_epi8<7>(pxs.val[2]), mul_epi8<11>(pxs.val[3]))), _mm256_set1_epi8(63)); + else + hash = _mm256_and_si256(_mm256_add_epi8(_mm256_add_epi8(mul_epi8<3>(pxs.val[0]), mul_epi8<5>(pxs.val[1])), _mm256_add_epi8(mul_epi8<7>(pxs.val[2]), _mm256_set1_epi8(static_cast(255*11)))), _mm256_set1_epi8(63)); + alignas(alignof(__m256i)) std::uint8_t runs[simd_lanes], diffs[simd_lanes], lumas[simd_lanes*2], hashs[simd_lanes]; + [[maybe_unused]] alignas(alignof(__m256i)) std::uint8_t alphas[simd_lanes]; + _mm256_store_si256(reinterpret_cast<__m256i*>(runs), runv); + _mm256_store_si256(reinterpret_cast<__m256i*>(diffs), diffv); + _mm256_store_si256(reinterpret_cast<__m256i*>(lumas), _mm256_unpacklo_epi8(lu, ma)); + _mm256_store_si256(reinterpret_cast<__m256i*>(lumas)+1, _mm256_unpackhi_epi8(lu, ma)); + _mm256_store_si256(reinterpret_cast<__m256i*>(hashs), hash); + if constexpr(Alpha) + if(!alpha) + _mm256_store_si256(reinterpret_cast<__m256i*>(alphas), diff.val[3]); + for(std::size_t i = r; i < simd_lanes; ++i){ + if(runs[i]){ + ++run; + pixels += Channels; + continue; + } + if(run > 1){ + *p++ = static_cast(chunk_tag::run | (run-1)); + run = 0; + } + else if(run == 1){ + if(prev_hash == index_size)[[unlikely]] + *p++ = chunk_tag::run; + else + *p++ = chunk_tag::index | prev_hash; + run = 0; + } + const auto index_pos = hashs[i]; + prev_hash = index_pos; + efficient_memcpy(&px, pixels); + pixels += Channels; + if(index[index_pos] == px){ + *p++ = chunk_tag::index | index_pos; + continue; + } + index[index_pos] = px; + + if constexpr(Alpha) + if(!alpha && !alphas[i]){ + *p++ = chunk_tag::rgba; + std::memcpy(p, &px, 4); + p += 4; + continue; + } + if(diffs[i]) + *p++ = diffs[i]; + else if(lumas[i*2]){ + std::memcpy(p, lumas + i*2, 2); + p += 2; + } + else{ + *p++ = chunk_tag::rgb; + efficient_memcpy<3>(p, &px); + p += 3; + } + } + prev = pxs; + } + p_.advance(p-p_.raw_pointer()); + + if constexpr(Alpha) + encode_body(p_, pixels_, index, px_len, px, prev_hash, run); + else{ + rgb_t px_prev; + efficient_memcpy<3>(&px_prev, &px); + encode_body(p_, pixels_, index, px_len, px_prev, prev_hash, run); + } + + push(p_, padding); + } +#endif +#endif + + template + static inline void encode_impl(Pusher& p, Puller& pixels, const desc& desc){ + rgba_t index[index_size] = {}; + + std::size_t px_len = desc.width * desc.height; + encode_body(p, pixels, index, px_len); + + push(p, padding); + } + + template + static inline desc decode_header(Puller& p){ + desc d; + const auto magic_ = read_32(p); + d.width = read_32(p); + d.height = read_32(p); + d.channels = p.pull(); + d.colorspace = static_cast(p.pull()); + if( + d.width == 0 || d.height == 0 || magic_ != magic || + d.channels < 3 || d.channels > 4 || + d.height >= pixels_max / d.width + )[[unlikely]] + throw std::runtime_error("qoixx::qoi::decode: invalid header"); + return d; + } + +#ifndef QOIXX_DECODE_WITH_TABLES +#define QOIXX_HPP_DECODE_WITH_TABLES_NOT_DEFINED +#ifdef __aarch64__ +#define QOIXX_DECODE_WITH_TABLES 0 +#else +#define QOIXX_DECODE_WITH_TABLES 1 +#endif +#endif + +#if QOIXX_DECODE_WITH_TABLES + static constexpr std::size_t hash_table_offset = std::numeric_limits::max()+1 - chunk_tag::diff; + static constexpr std::array::max()+1+chunk_tag::run-chunk_tag::diff> create_hash_diff_table(){ + std::array::max()+1+chunk_tag::run-chunk_tag::diff> table = {}; + for(std::size_t i = 0; i <= std::numeric_limits::max(); ++i){ + constexpr std::uint32_t mask_tail_4 = 0b0000'1111u; + const auto vr = (i >> 4); + const auto vb = (i & mask_tail_4); + table[i] = (vr*3 + vb*7) % index_size; + } + for(std::size_t i = chunk_tag::diff; i < chunk_tag::luma; ++i){ + constexpr std::uint32_t mask_tail_2 = 0b0000'0011u; + const auto vr = static_cast((i >> 4) & mask_tail_2) - 2; + const auto vg = static_cast((i >> 2) & mask_tail_2) - 2; + const auto vb = static_cast( i & mask_tail_2) - 2; + table[i+hash_table_offset] = static_cast((vr*3 + vg*5 + vb*7) % index_size); + } + for(std::size_t i = chunk_tag::luma; i < chunk_tag::run; ++i){ + constexpr int vgv = chunk_tag::luma+40; + const int vg = i - vgv; + table[i+hash_table_offset] = static_cast((vg*3 + (vg+8)*5 + vg*7) % index_size); + } + return table; + } + static constexpr std::array, std::numeric_limits::max()+1> create_luma_table(){ + std::array, std::numeric_limits::max()+1> table = {}; + for(std::size_t i = 0; i <= std::numeric_limits::max(); ++i){ + constexpr std::uint32_t mask_tail_4 = 0b0000'1111u; + const auto vr = (i >> 4); + const auto vb = (i & mask_tail_4); + table[i][0] = static_cast(vr); + table[i][1] = static_cast(vb); + } + return table; + } + static constexpr std::array, chunk_tag::luma> create_diff_table(){ + std::array, chunk_tag::luma> table = {}; + for(std::size_t i = chunk_tag::diff; i < chunk_tag::luma; ++i){ + constexpr std::uint32_t mask_tail_2 = 0b0000'0011u; + const auto vr = ((i >> 4) & mask_tail_2) - 2; + const auto vg = ((i >> 2) & mask_tail_2) - 2; + const auto vb = ( i & mask_tail_2) - 2; + table[i][0] = static_cast(vr); + table[i][1] = static_cast(vg); + table[i][2] = static_cast(vb); + } + return table; + } +#endif + + template + static inline void decode_impl(Pusher& pixels, Puller& p, std::size_t px_len, std::size_t size){ +#ifndef __aarch64__ + using rgba_t = std::conditional_t; +#endif + rgba_t px = {}; + if constexpr(std::is_same::value) + px.a = 255; + rgba_t index[index_size]; + if constexpr(std::is_same::value){ + index[(0*3+0*5+0*7+0*11)%index_size] = {}; + index[(0*3+0*5+0*7+255*11)%index_size] = px; + } + else + index[(0*3+0*5+0*7+255*11)%index_size] = {}; + +#if QOIXX_DECODE_WITH_TABLES +#define QOIXX_HPP_WITH_TABLES(...) __VA_ARGS__ +#define QOIXX_HPP_WITHOUT_TABLES(...) +#else +#define QOIXX_HPP_WITH_TABLES(...) +#define QOIXX_HPP_WITHOUT_TABLES(...) __VA_ARGS__ +#endif + + QOIXX_HPP_WITH_TABLES( + auto hash = px.hash() % index_size; + static constexpr auto luma_hash_diff_table = create_hash_diff_table(); + static constexpr auto hash_diff_table = luma_hash_diff_table.data() + hash_table_offset; + ) + + const auto f = [&pixels, &p, &px_len, &size, &px, &index QOIXX_HPP_WITH_TABLES(, &hash)]{ + const auto b1 = p.pull(); + --size; + +#if defined(__aarch64__) and not defined(QOIXX_NO_SIMD) +#define QOIXX_HPP_DECODE_RUN(px, run) { \ + if constexpr(Pusher::is_contiguous){ \ + ++run; \ + if(run >= 8){ \ + std::conditional_t data = {vdup_n_u8(px.r), vdup_n_u8(px.g), vdup_n_u8(px.b)}; \ + if constexpr(Channels == 4) \ + data.val[3] = vdup_n_u8(px.a); \ + while(run>=8){ \ + if constexpr(Channels == 4) \ + vst4_u8(pixels.raw_pointer(), data); \ + else \ + vst3_u8(pixels.raw_pointer(), data); \ + pixels.advance(Channels*8); \ + run -= 8; \ + } \ + } \ + while(run--){push(pixels, &px);} \ + } \ + else \ + do{push(pixels, &px);}while(run--); \ + } +#else +#define QOIXX_HPP_DECODE_RUN(px, run) do{push(pixels, &px);}while(run--); +#endif + + if(b1 >= chunk_tag::run){ + if(b1 < chunk_tag::rgb){ + /*run*/ + static constexpr std::uint32_t mask_tail_6 = 0b0011'1111u; + std::size_t run = b1 & mask_tail_6; + if(run >= px_len)[[unlikely]] + run = px_len; + px_len -= run; + QOIXX_HPP_DECODE_RUN(px, run) + return; + } + if(b1 == chunk_tag::rgb){ + pull<3>(&px, p); + size -= 3; + QOIXX_HPP_WITH_TABLES(hash = px.hash() % index_size;) + } + if constexpr(Channels == 4){ + if(b1 == chunk_tag::rgba){ + pull<4>(&px, p); + size -= 4; + QOIXX_HPP_WITH_TABLES(hash = px.hash() % index_size;) + } + } + else{ + if(b1 == chunk_tag::rgba)[[unlikely]]{ + pull<3>(&px, p); + p.advance(1); + size -= 4; + QOIXX_HPP_WITH_TABLES(hash = px.hash() % index_size;) + } + } + } + else if(b1 < chunk_tag::diff){ + /*index*/ + if constexpr(std::is_same::value) + px = index[b1]; + else + efficient_memcpy(&px, index + b1); + push(pixels, &px); + QOIXX_HPP_WITH_TABLES(hash = b1;) + return; + } + else if(b1 >= chunk_tag::luma){ + /*luma*/ + const auto b2 = p.pull(); + --size; + QOIXX_HPP_WITH_TABLES( + static constexpr auto table = create_luma_table(); + const auto drb = table[b2]; + ) + static constexpr int vgv = chunk_tag::luma+40; + const int vg = b1 - vgv; + QOIXX_HPP_WITH_TABLES( + px.r += vg + drb[0]; + px.g += vg + 8; + px.b += vg + drb[1]; + hash = (static_cast(hash)+hash_diff_table[b1]+luma_hash_diff_table[b2]) % index_size; + ) QOIXX_HPP_WITHOUT_TABLES( + static constexpr std::uint32_t mask_tail_4 = 0b0000'1111u; + px.r += vg + (b2 >> 4); + px.g += vg + 8; + px.b += vg + (b2 & mask_tail_4); + ) + } + else{ + /*diff*/ + QOIXX_HPP_WITH_TABLES( + static constexpr auto table = create_diff_table(); + const auto drgb = table[b1]; + px.r += drgb[0]; + px.g += drgb[1]; + px.b += drgb[2]; + hash = (static_cast(hash)+hash_diff_table[b1]) % index_size; + ) QOIXX_HPP_WITHOUT_TABLES( + static constexpr std::uint32_t mask_tail_2 = 0b0000'0011u; + px.r += ((b1 >> 4) & mask_tail_2) - 2; + px.g += ((b1 >> 2) & mask_tail_2) - 2; + px.b += ( b1 & mask_tail_2) - 2; + ) + } +#undef QOIXX_HPP_DECODE_RUN + if constexpr(std::is_same::value) + index[QOIXX_HPP_WITH_TABLES(hash) QOIXX_HPP_WITHOUT_TABLES(px.hash() % index_size)] = px; + else + efficient_memcpy(index + QOIXX_HPP_WITH_TABLES(hash) QOIXX_HPP_WITHOUT_TABLES(px.hash() % index_size), &px); +#undef QOIXX_HPP_WITHOUT_TABLES +#undef QOIXX_HPP_WITH_TABLES +#ifdef QOIXX_HPP_DECODE_WITH_TABLES_NOT_DEFINED +#undef QOIXX_DECODE_WITH_TABLES +#undef QOIXX_HPP_DECODE_WITH_TABLES_NOT_DEFINED +#endif + + push(pixels, &px); + }; + + while(px_len--)[[likely]]{ + f(); + if(size < sizeof(padding))[[unlikely]]{ + throw std::runtime_error("qoixx::qoi::decode: insufficient input data"); + } + } + } + public: + template + static inline T encode(const U& u, const desc& desc){ + using coU = container_operator; + if(!coU::valid(u) || coU::size(u) < desc.width*desc.height*desc.channels || desc.width == 0 || desc.height == 0 || desc.channels < 3 || desc.channels > 4 || desc.height >= pixels_max / desc.width)[[unlikely]] + throw std::invalid_argument{"qoixx::qoi::encode: invalid argument"}; + + const auto max_size = static_cast(desc.width) * desc.height * (desc.channels + 1) + header_size + sizeof(padding); + using coT = container_operator; + T data = coT::construct(max_size); + auto p = coT::create_pusher(data); + auto puller = coU::create_puller(u); + + write_32(p, magic); + write_32(p, desc.width); + write_32(p, desc.height); + p.push(desc.channels); + p.push(static_cast(desc.colorspace)); + +#ifndef QOIXX_NO_SIMD +#if defined(__ARM_FEATURE_SVE) + if constexpr(coT::pusher::is_contiguous && coU::puller::is_contiguous) + if(desc.channels == 4) +#define QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH \ + switch(svcntb()){ \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(128); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(256); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(384); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(512); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(640); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(768); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(896); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1024); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1152); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1280); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1408); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1536); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1664); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1792); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(1920); \ + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(2048); \ + default: while(true){/*unreachable*/} \ + } +#define QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(i) case i/8: encode_sve(p, puller, desc); break + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH +#undef QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE + else +#define QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE(i) case i/8: encode_sve(p, puller, desc); break; + QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH +#undef QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH_CASE +#undef QOIXX_HPP_SVE_REGISTER_SIZE_SWITCH + else +#elif defined(__aarch64__) + if constexpr(coT::pusher::is_contiguous && coU::puller::is_contiguous) + if(desc.channels == 4) + encode_neon<4>(p, puller, desc); + else + encode_neon<3>(p, puller, desc); + else +#elif defined(__AVX2__) + if constexpr(coT::pusher::is_contiguous && coU::puller::is_contiguous) + if(desc.channels == 4) + encode_avx2<4>(p, puller, desc); + else + encode_avx2<3>(p, puller, desc); + else +#endif +#endif + if(desc.channels == 4) + encode_impl<4>(p, puller, desc); + else + encode_impl<3>(p, puller, desc); + + return p.finalize(); + } + template + requires(sizeof(U) == 1) + static inline T encode(const U* pixels, std::size_t size, const desc& desc){ + return encode(std::make_pair(pixels, size), desc); + } + template + requires (!std::is_pointer_v) + static inline std::pair decode(const U& u, std::uint8_t channels = 0){ + using coU = container_operator; + const auto size = coU::size(u); + if(!coU::valid(u) || size < header_size + sizeof(padding) || (channels != 0 && channels != 3 && channels != 4))[[unlikely]] + throw std::invalid_argument{"qoixx::qoi::decode: invalid argument"}; + auto puller = coU::create_puller(u); + + const auto d = decode_header(puller); + if(channels == 0) + channels = d.channels; + + const std::size_t px_len = static_cast(d.width) * d.height; + using coT = container_operator; + T data = coT::construct(px_len*channels); + auto p = coT::create_pusher(data); + + if(channels == 4) + decode_impl<4>(p, puller, px_len, size); + else + decode_impl<3>(p, puller, px_len, size); + + return std::make_pair(std::move(p.finalize()), d); + } + template + requires(sizeof(U) == 1) + static inline std::pair decode(const U* pixels, std::size_t size, std::uint8_t channels = 0){ + return decode(std::make_pair(pixels, size), channels); + } +}; + +} + +#endif //QOIXX_HPP_INCLUDED_ diff --git a/Source/visualiser/OutputFragmentShader.glsl b/Source/visualiser/OutputFragmentShader.glsl index 75ac84b..52debae 100644 --- a/Source/visualiser/OutputFragmentShader.glsl +++ b/Source/visualiser/OutputFragmentShader.glsl @@ -24,13 +24,14 @@ float noise(in vec2 uv, in float time) { } void main() { + float glow = uGlow / (2.0 * max(0.0001,sqrt(uExposure))); vec4 line = texture2D(uTexture0, vTexCoordCanvas); // r components have grid; g components do not. vec4 screen = texture2D(uTexture3, vTexCoord); vec4 tightGlow = texture2D(uTexture1, vTexCoord); - vec4 scatter = texture2D(uTexture2, vTexCoord)+0.35; - float light = line.r + uGlow * 1.5 * screen.g * screen.g * tightGlow.r; - light += uGlow * 0.4 * scatter.g * (2.0 + 1.0 * screen.g + 0.5 * screen.r); + vec4 scatter = texture2D(uTexture2, vTexCoord)+glow; + float light = line.r * 1.2 * screen.r + 1.5 * screen.r * screen.g * tightGlow.r; + light += glow * 0.4 * scatter.g * (2.0 + 1.0 * screen.r + 0.5 * screen.r); float tlight = 1.0-pow(2.0, -uExposure*light); float tlight2 = tlight * tlight * tlight; gl_FragColor.rgb = mix(uColour, vec3(1.0), 0.3+tlight2*tlight2*0.5)*tlight; diff --git a/Source/visualiser/VisualiserComponent.cpp b/Source/visualiser/VisualiserComponent.cpp index 17e6f88..8920036 100644 --- a/Source/visualiser/VisualiserComponent.cpp +++ b/Source/visualiser/VisualiserComponent.cpp @@ -14,14 +14,17 @@ VisualiserComponent::VisualiserComponent(AudioBackgroundThreadManager& threadManager, VisualiserSettings& settings, VisualiserComponent* parent, bool visualiserOnly) : settings(settings), threadManager(threadManager), visualiserOnly(visualiserOnly), AudioBackgroundThread("VisualiserComponent", threadManager), parent(parent) { addAndMakeVisible(record); - record.setPulseAnimation(true); + //record.setPulseAnimation(true); record.onClick = [this] { toggleRecording(); - stopwatch.stop(); - stopwatch.reset(); + //stopwatch.stop(); + //stopwatch.reset(); + /* if (record.getToggleState()) { stopwatch.start(); } + */ + record.setToggleState(false, juce::NotificationType::dontSendNotification); resized(); }; @@ -134,7 +137,11 @@ bool VisualiserComponent::keyPressed(const juce::KeyPress& key) { void VisualiserComponent::setFullScreen(bool fullScreen) {} void VisualiserComponent::toggleRecording() { - + chooser = std::make_unique("Choose a .wav file to render...", juce::File(), "*.wav;*.flac"); + auto chooserFlags = juce::FileBrowserComponent::openMode | juce::FileBrowserComponent::canSelectFiles; + chooser->launchAsync(chooserFlags, [this](const juce::FileChooser& fc) { + audioFile = fc.getResult(); + }); } void VisualiserComponent::haltRecording() { @@ -244,6 +251,7 @@ void VisualiserComponent::openGLContextClosing() { glDeleteTextures(1, &blur2Texture.id); glDeleteTextures(1, &blur3Texture.id); glDeleteTextures(1, &blur4Texture.id); + glDeleteTextures(1, &renderTexture.id); screenOpenGLTexture.release(); simpleShader.reset(); @@ -279,26 +287,20 @@ void VisualiserComponent::renderOpenGL() { setupArrays(RESAMPLE_RATIO * sampleRate / FRAME_RATE); } time += 0.01f; - juce::OpenGLHelpers::clear(juce::Colours::black); + intensity = settings.getIntensity() * (41000.0f / sampleRate); if (active) { juce::CriticalSection::ScopedLockType lock(samplesLock); - - if (graticuleEnabled != settings.getGraticuleEnabled() || smudgesEnabled != settings.getSmudgesEnabled()) { - graticuleEnabled = settings.getGraticuleEnabled(); - smudgesEnabled = settings.getSmudgesEnabled(); - screenTexture = createScreenTexture(); + + if (audioFile != juce::File{}) { + renderAudioFile(audioFile, FILE_RENDER_QOI); + audioFile = juce::File{}; } - renderScale = (float) openGLContext.getRenderingScale(); - if (settings.parameters.upsamplingEnabled->getBoolValue()) { - drawLineTexture(smoothedXSamples, smoothedYSamples, smoothedZSamples); + renderScope(smoothedXSamples, smoothedYSamples, smoothedZSamples); } else { - drawLineTexture(xSamples, ySamples, zSamples); + renderScope(xSamples, ySamples, zSamples); } - checkGLErrors("drawLineTexture"); - drawCRT(); - checkGLErrors("drawCRT"); } } } @@ -373,10 +375,12 @@ void VisualiserComponent::setupTextures() { blur2Texture = makeTexture(256, 256); blur3Texture = makeTexture(32, 32); blur4Texture = makeTexture(32, 32); + renderTexture = makeTexture(1024, 1024); screenTexture = createScreenTexture(); glBindFramebuffer(GL_FRAMEBUFFER, 0); // Unbind + } Texture VisualiserComponent::makeTexture(int width, int height) { @@ -398,45 +402,48 @@ Texture VisualiserComponent::makeTexture(int width, int height) { return { textureID, width, height }; } -void VisualiserComponent::drawLineTexture(const std::vector& xPoints, const std::vector& yPoints, const std::vector& zPoints) { +void VisualiserComponent::drawLineTexture(const std::vector& xP, const std::vector& yP, const std::vector& zP) { using namespace juce::gl; fadeAmount = juce::jmin(1.0, std::pow(0.5, settings.getPersistence()) * 0.4); activateTargetTexture(lineTexture); fade(); - drawLine(xPoints, yPoints, zPoints); + drawLine(xP, yP, zP); glBindTexture(GL_TEXTURE_2D, targetTexture.value().id); } -void VisualiserComponent::saveTextureToFile(GLuint textureID, int width, int height, const juce::File& file) { +void VisualiserComponent::saveTextureToPNG(Texture texture, const juce::File& file) { using namespace juce::gl; + GLuint textureID = texture.id; + int width = texture.width; + int height = texture.height; // Bind the texture to read its data glBindTexture(GL_TEXTURE_2D, textureID); // Create a vector to store the pixel data (RGBA) - std::vector pixels(width * height * 4); + std::vector pixels(width * height * 8); // Read the pixels from the texture glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, pixels.data()); // Convert raw pixel data to JUCE Image - juce::Image image(juce::Image::PixelFormat::ARGB, width, height, true); // Create a JUCE image + juce::Image* image = new juce::Image (juce::Image::PixelFormat::ARGB, width, height, true); // Create a JUCE image // Lock the image to get access to its pixel data - juce::Image::BitmapData bitmapData(image, juce::Image::BitmapData::writeOnly); + juce::Image::BitmapData bitmapData(*image, juce::Image::BitmapData::writeOnly); // Copy the pixel data to the JUCE image (and swap R and B channels) for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { int srcIndex = (y * width + x) * 4; // RGBA format - juce::uint8 r = pixels[srcIndex]; // Red - juce::uint8 g = pixels[srcIndex + 1]; // Green - juce::uint8 b = pixels[srcIndex + 2]; // Blue - juce::uint8 a = pixels[srcIndex + 3]; // Alpha + juce::uint8 r = (pixels)[srcIndex]; // Red + juce::uint8 g = (pixels)[srcIndex + 1]; // Green + juce::uint8 b = (pixels)[srcIndex + 2]; // Blue + juce::uint8 a = (pixels)[srcIndex + 3]; // Alpha - // JUCE stores colors in ARGB, so we need to adjust the channel order - bitmapData.setPixelColour(x, y, juce::Colour(a, r, g, b)); + // This method uses colors in RGBA + bitmapData.setPixelColour(x, height-y-1, juce::Colour(r, g, b, a)); } } @@ -448,11 +455,29 @@ void VisualiserComponent::saveTextureToFile(GLuint textureID, int width, int hei std::unique_ptr outputStream(file.createOutputStream()); if (outputStream != nullptr) { outputStream->setPosition(0); - pngFormat.writeImageToStream(image, *outputStream); + pngFormat.writeImageToStream(*image, *outputStream); outputStream->flush(); } + delete image; } +void VisualiserComponent::saveTextureToQOI(Texture texture, const juce::File& file) { + using namespace juce::gl; + GLuint textureID = texture.id; + int width = texture.width; + int height = texture.height; + + // Bind the texture to read its data + glBindTexture(GL_TEXTURE_2D, textureID); + + if (pixels.size() < 1024 * 1024 * 4) pixels.resize(1024 * 1024 * 4); + + // Read the pixels from the texture + glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, pixels.data()); + + std::vector binaryData = qoixx::qoi::encode>(pixels, imageFormat); + file.replaceWithData(binaryData.data(), binaryData.size()); +} void VisualiserComponent::activateTargetTexture(std::optional texture) { using namespace juce::gl; @@ -504,7 +529,7 @@ void VisualiserComponent::drawTexture(std::optional texture0, std::opti glBufferData(GL_ARRAY_BUFFER, sizeof(float) * fullScreenQuad.size(), fullScreenQuad.data(), GL_STATIC_DRAW); glVertexAttribPointer(glGetAttribLocation(currentShader->getProgramID(), "aPos"), 2, GL_FLOAT, GL_FALSE, 0, 0); glBindBuffer(GL_ARRAY_BUFFER, 0); - + glDrawArrays(GL_TRIANGLES, 0, 6); glDisableVertexAttribArray(glGetAttribLocation(currentShader->getProgramID(), "aPos")); @@ -525,18 +550,21 @@ void VisualiserComponent::setNormalBlending() { glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); } -void VisualiserComponent::drawLine(const std::vector& xPoints, const std::vector& yPoints, const std::vector& zPoints) { +void VisualiserComponent::drawLine(const std::vector& xP, const std::vector& yP, const std::vector& zP) { using namespace juce::gl; setAdditiveBlending(); - int nPoints = xPoints.size(); + int nPoints = xP.size(); + + // Without this, there's an access violation that seems to occur only on some systems + if (scratchVertices.size() != nPoints * 12) scratchVertices.resize(nPoints * 12); for (int i = 0; i < nPoints; ++i) { int p = i * 12; - scratchVertices[p] = scratchVertices[p + 3] = scratchVertices[p + 6] = scratchVertices[p + 9] = xPoints[i]; - scratchVertices[p + 1] = scratchVertices[p + 4] = scratchVertices[p + 7] = scratchVertices[p + 10] = yPoints[i]; - scratchVertices[p + 2] = scratchVertices[p + 5] = scratchVertices[p + 8] = scratchVertices[p + 11] = zPoints[i]; + scratchVertices[p] = scratchVertices[p + 3] = scratchVertices[p + 6] = scratchVertices[p + 9] = xP[i]; + scratchVertices[p + 1] = scratchVertices[p + 4] = scratchVertices[p + 7] = scratchVertices[p + 10] = yP[i]; + scratchVertices[p + 2] = scratchVertices[p + 5] = scratchVertices[p + 8] = scratchVertices[p + 11] = zP[i]; } glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer); @@ -561,7 +589,6 @@ void VisualiserComponent::drawLine(const std::vector& xPoints, const std: lineShader->setUniform("uGain", 450.0f / 512.0f); lineShader->setUniform("uInvert", 1.0f); - float intensity = settings.getIntensity() * (41000.0f / sampleRate); if (settings.getUpsamplingEnabled()) { lineShader->setUniform("uIntensity", intensity); } else { @@ -572,7 +599,7 @@ void VisualiserComponent::drawLine(const std::vector& xPoints, const std: lineShader->setUniform("uNEdges", (GLfloat) nEdges); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertexIndexBuffer); - int nEdgesThisTime = xPoints.size() - 1; + int nEdgesThisTime = xP.size() - 1; glDrawElements(GL_TRIANGLES, nEdgesThisTime * 6, GL_UNSIGNED_INT, 0); glDisableVertexAttribArray(glGetAttribLocation(lineShader->getProgramID(), "aStart")); @@ -598,30 +625,31 @@ void VisualiserComponent::fade() { } void VisualiserComponent::drawCRT() { + using namespace juce::gl; setNormalBlending(); - + activateTargetTexture(blur1Texture); setShader(texturedShader.get()); texturedShader->setUniform("uResizeForCanvas", lineTexture.width / 1024.0f); drawTexture(lineTexture); - + //horizontal blur 256x256 activateTargetTexture(blur2Texture); setShader(blurShader.get()); blurShader->setUniform("uOffset", 1.0f / 256.0f, 0.0f); drawTexture(blur1Texture); - + //vertical blur 256x256 activateTargetTexture(blur1Texture); blurShader->setUniform("uOffset", 0.0f, 1.0f / 256.0f); drawTexture(blur2Texture); - + //preserve blur1 for later activateTargetTexture(blur3Texture); setShader(texturedShader.get()); texturedShader->setUniform("uResizeForCanvas", 1.0f); drawTexture(blur1Texture); - + //horizontal blur 64x64 activateTargetTexture(blur4Texture); setShader(blurShader.get()); @@ -632,19 +660,24 @@ void VisualiserComponent::drawCRT() { activateTargetTexture(blur3Texture); blurShader->setUniform("uOffset", -1.0f / 60.0f, 1.0f / 32.0f); drawTexture(blur4Texture); - - activateTargetTexture(std::nullopt); + + activateTargetTexture(renderTexture); setShader(outputShader.get()); float brightness = std::pow(2, settings.getBrightness() - 2); outputShader->setUniform("uExposure", brightness); - outputShader->setUniform("uSaturation", (float) settings.getSaturation()); - outputShader->setUniform("uNoise", (float) settings.getNoise()); + outputShader->setUniform("uSaturation", (float)settings.getSaturation()); + outputShader->setUniform("uNoise", (float)settings.getNoise()); outputShader->setUniform("uTime", time); - outputShader->setUniform("uGlow", (float) settings.getGlow()); + outputShader->setUniform("uGlow", (float)settings.getGlow()); outputShader->setUniform("uResizeForCanvas", lineTexture.width / 1024.0f); juce::Colour colour = juce::Colour::fromHSV(settings.getHue() / 360.0f, 1.0, 1.0, 1.0); outputShader->setUniform("uColour", colour.getFloatRed(), colour.getFloatGreen(), colour.getFloatBlue()); + activateTargetTexture(renderTexture); drawTexture(lineTexture, blur1Texture, blur3Texture, screenTexture); + + activateTargetTexture(std::nullopt); + setShader(texturedShader.get()); + drawTexture(renderTexture); } Texture VisualiserComponent::createScreenTexture() { @@ -747,3 +780,111 @@ void VisualiserComponent::paint(juce::Graphics& g) { g.drawText(text, viewportArea, juce::Justification::centred); } } + +void VisualiserComponent::renderScope(const std::vector& xp, const std::vector& yp, const std::vector& zp) { + if (graticuleEnabled != settings.getGraticuleEnabled() || smudgesEnabled != settings.getSmudgesEnabled()) { + graticuleEnabled = settings.getGraticuleEnabled(); + smudgesEnabled = settings.getSmudgesEnabled(); + screenTexture = createScreenTexture(); + } + + renderScale = (float)openGLContext.getRenderingScale(); + + drawLineTexture(xp, yp, zp); + checkGLErrors("drawLineTexture"); + drawCRT(); + checkGLErrors("drawCRT"); +} + +// sourceAudio must be a .wav file +int VisualiserComponent::renderAudioFile(juce::File& sourceAudio, int method, int width, int height) { + if (!sourceAudio.existsAsFile()) return 0; + if (sourceAudio.getFileExtension() != ".wav" && sourceAudio.getFileExtension() != ".flacI ") return -1; + + using namespace juce::gl; + + juce::AudioFormatManager manager; + manager.registerBasicFormats(); + juce::AudioFormat *audioFormat = manager.getDefaultFormat(); + juce::AudioFormatReader* reader = manager.createReaderFor(sourceAudio); + juce::AudioSampleBuffer buffer; + buffer.setSize(2, reader->lengthInSamples, false, false, false); + bool readSucceeded = reader->read(&buffer, 0, reader->lengthInSamples, 0, true, true); + if (!readSucceeded) return -2; + + int fileChannels = buffer.getNumChannels(); + int fileSamples = buffer.getNumSamples(); + double fileSampleRate = reader->sampleRate; + + sampleRate = fileSampleRate; + intensity = settings.getIntensity() * 41000.f / sampleRate; + oldSampleRate = fileSampleRate; + int frameNSamples = sampleRate / FRAME_RATE; + int frameNSamplesResampled = frameNSamples * RESAMPLE_RATIO; + bool resample = settings.parameters.upsamplingEnabled->getBoolValue(); + + if (resample) setupArrays(frameNSamplesResampled); + else setupArrays(frameNSamples); + + xResampler.prepare(sampleRate, RESAMPLE_RATIO); + yResampler.prepare(sampleRate, RESAMPLE_RATIO); + zResampler.prepare(sampleRate, RESAMPLE_RATIO); + + int nFrames = std::ceil(((float)fileSamples) / frameNSamples); + + std::vector fileXSamples(frameNSamples); + std::vector fileYSamples(frameNSamples); + std::vector fileZSamples(frameNSamples); + + std::vector frameXSamples(resample ? frameNSamplesResampled : frameNSamples); + std::vector frameYSamples(resample ? frameNSamplesResampled : frameNSamples); + std::vector frameZSamples(resample ? frameNSamplesResampled : frameNSamples); + + std::string fileName; + juce::File destDir = sourceAudio.getParentDirectory().getChildFile("sosci export/"); + int f; + for (f = 0; f < nFrames; f++) { + for (int s = 0; s < frameNSamples; s++) { + if (fileChannels > 0) (fileXSamples)[s] = (buffer.getSample(0, std::min(f * frameNSamples + s, fileSamples - 1))); + if (fileChannels > 1) (fileYSamples)[s] = -(buffer.getSample(1, std::min(f * frameNSamples + s, fileSamples - 1))); + else fileYSamples[s] = fileXSamples[s]; + if (fileChannels > 2) (fileZSamples)[s] = (buffer.getSample(2, std::min(f * frameNSamples + s, fileSamples - 1))); + else fileZSamples[s] = 1; + } + + if (resample) { + xResampler.process((fileXSamples).data(), (frameXSamples).data(), frameNSamples); + yResampler.process((fileYSamples).data(), (frameYSamples).data(), frameNSamples); + zResampler.process((fileZSamples).data(), (frameZSamples).data(), frameNSamples); + } + else { + for (int s = 0; s < frameNSamples; s++) { + (frameXSamples)[s] = (fileXSamples)[s]; + (frameYSamples)[s] = (fileYSamples)[s]; + (frameZSamples)[s] = (fileZSamples)[s]; + } + } + + renderScope(frameXSamples, frameYSamples, frameZSamples); + + fileName = std::to_string(f); + fileName = std::string(std::max(0, (int)(6 - fileName.length())), '0') + fileName; + + switch (method) { + case FILE_RENDER_DUMMY: + break; + case FILE_RENDER_PNG: + saveTextureToPNG(renderTexture, destDir.getChildFile(fileName + ".png")); + break; + case FILE_RENDER_QOI: + saveTextureToQOI(renderTexture, destDir.getChildFile(fileName + ".qoi")); + break; + }; + time += 0.01f; + } + + // cleanup + delete reader; + + return f; +} \ No newline at end of file diff --git a/Source/visualiser/VisualiserComponent.h b/Source/visualiser/VisualiserComponent.h index fbbba92..7a99709 100644 --- a/Source/visualiser/VisualiserComponent.h +++ b/Source/visualiser/VisualiserComponent.h @@ -7,6 +7,11 @@ #include "../components/SvgButton.h" #include "VisualiserSettings.h" #include "../components/StopwatchComponent.h" +#include "../img/qoixx.hpp" + +#define FILE_RENDER_DUMMY 0 +#define FILE_RENDER_PNG 1 +#define FILE_RENDER_QOI 2 enum class FullScreenMode { TOGGLE, @@ -58,6 +63,7 @@ public: std::function recordingHalted; private: + float intensity; const double FRAME_RATE = 60.0; bool visualiserOnly; @@ -114,6 +120,7 @@ private: Texture blur2Texture; Texture blur3Texture; Texture blur4Texture; + Texture renderTexture; juce::OpenGLTexture screenOpenGLTexture; juce::Image screenTextureImage = juce::ImageFileFormat::loadFrom(BinaryData::noise_jpg, BinaryData::noise_jpgSize); juce::Image emptyScreenImage = juce::ImageFileFormat::loadFrom(BinaryData::empty_jpg, BinaryData::empty_jpgSize); @@ -142,7 +149,8 @@ private: void setupArrays(int num_points); void setupTextures(); void drawLineTexture(const std::vector& xPoints, const std::vector& yPoints, const std::vector& zPoints); - void saveTextureToFile(GLuint textureID, int width, int height, const juce::File& file); + void saveTextureToPNG(Texture texture, const juce::File& file); + void saveTextureToQOI(Texture texture, const juce::File& file); void activateTargetTexture(std::optional texture); void setShader(juce::OpenGLShaderProgram* program); void drawTexture(std::optional texture0, std::optional texture1 = std::nullopt, std::optional texture2 = std::nullopt, std::optional texture3 = std::nullopt); @@ -153,8 +161,17 @@ private: void drawCRT(); void checkGLErrors(const juce::String& location); void viewportChanged(juce::Rectangle area); + + void renderScope(const std::vector& xPoints, const std::vector& yPoints, const std::vector& zPoints); + int renderAudioFile(juce::File& sourceAudio, int method = 1, int width = 1024, int height = 1024); + Texture createScreenTexture(); + juce::File audioFile; + + std::vector pixels; + const qoixx::qoi::desc imageFormat{ .width = 1024, .height = 1024, .channels = 4, .colorspace = qoixx::qoi::colorspace::srgb }; + JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR(VisualiserComponent) };