pico-playground/scanvideo/render/spans.c

455 wiersze
20 KiB
C

/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
#include <string.h>
#include "pico.h"
#include "image.h"
#include "spans.h"
#include "pico/scanvideo/composable_scanline.h"
#ifdef __arm__
#pragma GCC push_options
#pragma GCC optimize("O3")
#endif
#ifdef ENABLE_SPAN_ASSERTIONS
#define span_assert(x) assert(x)
#else
#define span_assert(x) false
#endif
inline static void
init_span(struct span *span, uint8_t type, uint16_t flags, uint16_t visible_width, struct span *prev) {
memset(span, 0, sizeof(struct span));
if (prev) {
prev->next = span;
}
span->flags = flags;
span->width = visible_width;
span->type = type;
}
void init_solid_color_span(struct span *span, uint16_t width, uint16_t color16, struct span *prev) {
init_span(span, SPAN_SOLID, CF_HAS_OPAQUE, width, prev);
set_solid_color_span_color(span, color16);
}
void init_vogon_4bit_span(struct span *span, uint16_t content_width, const uint8_t *encoding, uint16_t encoded_size,
struct palette16 *palette, struct span *prev) {
// by default we have a clip_left of 0, and a width of content_width
init_span(span, SPAN_4BIT_VOGON_OPAQUE, palette->flags & CF_OPACITY_MASK, content_width, prev);
set_vogon_4bit_span_encoding(span, encoding, encoded_size);
span->vogon.content_width = content_width;
span->vogon.palette = palette;
// palette should be opaque
assert(CF_HAS_OPAQUE == (palette->flags & CF_OPACITY_MASK));
}
void __time_critical_func(set_solid_color_span_color)(struct span *span, uint16_t color16) {
assert(span->type == SPAN_SOLID);
span->solid.color16 = color16;
}
void __time_critical_func(set_vogon_4bit_span_encoding)(struct span *span, const uint8_t *data, uint16_t data_length) {
assert(span->type == SPAN_4BIT_VOGON_OPAQUE);
span->vogon.data = data;
span->vogon.data_length = data_length;
}
void __time_critical_func(set_vogon_4bit_clipping)(struct span *span, int clip_left, int display_width) {
assert(span->type == SPAN_4BIT_VOGON_OPAQUE);
assert(clip_left >= 0);
assert(display_width >= 0); // todo should we allow this? probably
assert(clip_left + display_width <= span->vogon.content_width);
span->vogon.clip_left = clip_left;
span->width = display_width;
}
// todo needs to be shared - currently the same as GAP_SKIPPED_PIXELS as it happens
#define MIN_COLOR_RUN 3
// todo allow for chained DMA (indeed, we may have a pool of small fixed size chunks (says 64 words) we can re-use for scanlines anyway - a big scanline could use more than one
// todo but we can simply split our rendering across them (and link them into the chain)... this will make it easier to join in raw data etc.
// todo simple span allocation
int32_t __time_critical_func(single_color_scanline)(uint32_t *buf, size_t buf_length, int width, uint32_t color16) {
assert(buf_length >= 2);
assert(width >= MIN_COLOR_RUN);
// | jmp color_run | color | count-3 | buf[0] =
buf[0] = COMPOSABLE_COLOR_RUN | (color16 << 16);
buf[1] = (width - MIN_COLOR_RUN) | (COMPOSABLE_RAW_1P << 16);
// note we must end with a black pixel
buf[2] = 0 | (COMPOSABLE_EOL_ALIGN << 16);
return 3;
}
#define output_4bit_paletted_pixels_ff(output, palette_entries, encoding, count) if (true) { \
span_assert((count)>0); \
span_assert(!((count)&1)); \
uint32_t p = *encoding++; \
if ((count)>2) { \
*output++ = COMPOSABLE_RAW_RUN; \
*output++ = palette_entries[p&0xf]; \
*output++ = (count) - 3; \
*output++ = palette_entries[p>>4]; \
int c = count; \
while (0 < (c = c -2)) { \
p = *encoding++; \
*output++ = palette_entries[p&0xf]; \
*output++ = palette_entries[p>>4]; \
} \
} else { \
*output++ = COMPOSABLE_RAW_2P; \
*output++ = palette_entries[p&0xf]; \
*output++ = palette_entries[p>>4]; \
} \
} else __builtin_unreachable()
#define output_4bit_paletted_pixels_fx(output, palette_entries, encoding, count) if (true) { \
span_assert((count)>0); \
uint32_t p = *encoding++; \
if ((count)>2) { \
*output++ = COMPOSABLE_RAW_RUN; \
*output++ = palette_entries[p&0xf]; \
*output++ = (count) - 3; \
*output++ = palette_entries[p>>4]; \
int c = count; \
while (1 < (c = c -2)) { \
p = *encoding++; \
*output++ = palette_entries[p&0xf]; \
*output++ = palette_entries[p>>4]; \
} \
if (count & 1) { \
p = *encoding++; \
*output++ = palette_entries[p&0xf]; \
} \
} else { \
if ((count) == 1) { \
*output++ = COMPOSABLE_RAW_1P; \
*output++ = palette_entries[p&0xf]; \
} else { \
*output++ = COMPOSABLE_RAW_2P; \
*output++ = palette_entries[p&0xf]; \
*output++ = palette_entries[p>>4]; \
} \
} \
} else __builtin_unreachable()
#define XXoutput_4bit_paletted_pixels_xf(output, palette_entries, encoding, count) encoding += ((count+1)>>1)
#define output_4bit_paletted_pixels_xf(output, palette_entries, encoding, count) if (true) { \
span_assert((count)>0); \
uint32_t p = *encoding++; \
if ((count)>2) { \
*output++ = COMPOSABLE_RAW_RUN; \
if ((count) & 1) { \
*output++ = palette_entries[p>>4]; \
*output++ = (count) - 3; \
} else { \
*output++ = palette_entries[p&0xf]; \
*output++ = (count) - 3; \
*output++ = palette_entries[p>>4]; \
} \
int c = ((count)-1)>>1; \
while (c--) { \
p = *encoding++; \
*output++ = palette_entries[p&0xf]; \
*output++ = palette_entries[p>>4]; \
} \
} else { \
if ((count) == 1) { \
*output++ = COMPOSABLE_RAW_1P; \
} else { \
*output++ = COMPOSABLE_RAW_2P; \
*output++ = palette_entries[p&0xf]; \
} \
*output++ = palette_entries[p>>4]; \
} \
} else __builtin_unreachable()
#define output_color_one_pixel(output, color) if (true) { \
*output++ = COMPOSABLE_RAW_1P; \
*output++ = color; \
} else __builtin_unreachable()
#define output_color_two_pixels(output, color) if (true) { \
*output++ = COMPOSABLE_RAW_2P; \
*output++ = color; \
*output++ = color; \
} else __builtin_unreachable()
#define output_color_run_as_run_length(output, color, run_length) if (true) { \
span_assert(run_length >= MIN_COLOR_RUN); \
*output++ = COMPOSABLE_COLOR_RUN; \
*output++ = color; \
*output++ = (run_length) - MIN_COLOR_RUN; \
} else __builtin_unreachable()
#define output_color_run_of_min_size(output, color, run_length) if (true) { \
span_assert(run_length >= MIN_COLOR_RUN); \
output_color_run_as_run_length(output, color, run_length); \
} else __builtin_unreachable()
#define output_color_run_of_any_size(output, color, run_length) if (true) { \
if ((run_length) >= 3) { \
output_color_run_as_run_length(output, color, run_length); \
} else if ((run_length) == 1) { \
output_color_one_pixel(output, color); \
} else if ((run_length) == 2) { \
output_color_two_pixels(output, color); \
} else { \
assert(false); \
} \
} else __builtin_unreachable()
/**
* This method is kinda ugly, but really needs to be fast - C++ and particular templates and references could probably make it better
* but still, this will probably want to be assembly anyway. For now cut and paste code rather than sub-method fragments to string together...
* assembly being good for state machines!
*
* Actually I've started to move some common stuff/loops out into static inline functions that we can hopefully _asm-ify in the short term
*
* @param render_spans_buffer
* @param max_words
* @param head
* @param width
* @param do_free
* @return
*/
int32_t __time_critical_func(render_spans)(uint32_t *render_spans_buffer, size_t max_words, struct span *head,
int width) {
uint16_t *output = (uint16_t *) render_spans_buffer;
assert(!(3u & (uintptr_t) output)); // should be dword aligned
#ifndef NDEBUG
// todo output_end
uint16_t *output_end = output + 2 * max_words;
#endif
int total_pixels_remaining = width;
for (const struct span *cur = head; cur && total_pixels_remaining > 0; cur = cur->next) {
int local_pixels_remaining = cur->width;
if (!local_pixels_remaining) continue;
total_pixels_remaining -= local_pixels_remaining;
if (total_pixels_remaining < 0) {
local_pixels_remaining += total_pixels_remaining;
}
// todo i think this is reasonable, since for it to be 0 we'd have to have pixels_remaining == 0
span_assert(local_pixels_remaining > 0);
if (cur->type == SPAN_SOLID) {
// no hard clipping work; we just output what we're told
uint16_t color = cur->solid.color16;
output_color_run_of_any_size(output, color, local_pixels_remaining);
} else if (cur->type == SPAN_4BIT_VOGON_OPAQUE) {
int skip_pixels_remaining = cur->vogon.clip_left;
int right_clipped_pixels = cur->vogon.content_width - skip_pixels_remaining - local_pixels_remaining;
const uint16_t *palette_entries = cur->vogon.palette->entries;
const uint8_t *encoding = cur->vogon.data;
uint8_t c;
// deal with the skip pixels if any (do the whole rendering loop here, because it has been adulterated
// with code to check for clipping
while (skip_pixels_remaining > 0) {
c = *encoding++;
/* -------------------------------
// this variant skips a run which is wholly inside the clip_left
// or does a partially clipped span (which may be both left and right clipped)
// -------------------------------
*/
assert(right_clipped_pixels == 0); // can't do that here for now
if (RAW_PIXELS_SHORT == (c & 0xc0)) {
// count is already pairs of pixels count
int pair_count = ((c & 0x3f) + 1);
int run_length = pair_count << 1;
const uint8_t *end = encoding + pair_count;
if (skip_pixels_remaining < run_length) {
encoding += skip_pixels_remaining >> 1;
run_length -= skip_pixels_remaining;
output_4bit_paletted_pixels_xf(output, palette_entries, encoding, run_length);
skip_pixels_remaining = 0;
} else {
// wholly clipped
skip_pixels_remaining -= run_length;
encoding = end;
}
span_assert(encoding == end);
} else if (COLOR_PIXELS_SHORT == (c & 0xc0)) {
int run_length = ((c & 0x3f) + MIN_COLOR_SPAN_4BIT);
skip_pixels_remaining -= run_length;
if (skip_pixels_remaining < 0) {
run_length = -skip_pixels_remaining;
span_assert(run_length > 0);
uint16_t color = palette_entries[*encoding++];
output_color_run_of_any_size(output, color, run_length);
} else {
encoding++;
}
} else if (SINGLE_PIXEL == (c & 0xf0)) {
// if we are clipped, then there is nothing to do (no pixels left)
skip_pixels_remaining--;
} else if (c == COLOR_PIXELS_LONG) {
int run_length = 1 + *encoding++;
run_length += (*encoding++ << 8);
skip_pixels_remaining -= run_length;
if (skip_pixels_remaining < 0) {
run_length = -skip_pixels_remaining;
span_assert(run_length > 0);
uint16_t color = palette_entries[*encoding++];
output_color_run_of_any_size(output, color, run_length);
} else {
encoding++;
}
} else if (c == RAW_PIXELS_LONG) {
int run_length = 1 + *encoding++;
run_length += (*encoding++ << 8);
span_assert(!(run_length & 1)); // we always have even numbers of pixels
if (skip_pixels_remaining < run_length) {
encoding += skip_pixels_remaining >> 1;
run_length -= skip_pixels_remaining;
output_4bit_paletted_pixels_xf(output, palette_entries, encoding, run_length);
skip_pixels_remaining = 0;
} else {
encoding += run_length >> 1;
skip_pixels_remaining -= run_length;
}
span_assert(encoding == end);
} else if (c == END_OF_LINE) {
// just pass it on, though we could do some assertiony stuff here
encoding--;
break;
} else {
return -1;
}
}
if (!right_clipped_pixels) {
// -------------------------------
// here we do entirely unclipped runs from now on, without having to bother
// with book-keeping
// -------------------------------
while (true) {
c = *encoding++;
if (RAW_PIXELS_SHORT == (c & 0xc0)) {
// count is pairs of pixels
int run_length = ((c & 0x3f) + 1) * 2;
output_4bit_paletted_pixels_ff(output, palette_entries, encoding, run_length);
} else if (COLOR_PIXELS_SHORT == (c & 0xc0)) {
int run_length = ((c & 0x3f) + MIN_COLOR_SPAN_4BIT);
uint16_t color = palette_entries[*encoding++];
output_color_run_of_min_size(output, color, run_length);
} else if (SINGLE_PIXEL == (c & 0xf0)) {
uint16_t color = palette_entries[c & 0xf];
output_color_one_pixel(output, color);
} else if (c == COLOR_PIXELS_LONG) {
int run_length = 1 + *encoding++;
run_length += (*encoding++) << 8;
uint16_t color = palette_entries[*encoding++];
output_color_run_of_min_size(output, color, run_length);
} else if (c == RAW_PIXELS_LONG) {
int run_length = 1 + *encoding++;
run_length += (*encoding++) << 8;
assert(!(run_length & 1)); // we always have even numbers of pixels
output_4bit_paletted_pixels_ff(output, palette_entries, encoding, run_length);
} else if (c == END_OF_LINE) {
break;
} else {
return -1;
}
}
} else {
span_assert(right_clipped_pixels > 0); // should not be negative ever
span_assert(local_pixels_remaining > 0); // believe this is impossible
// similar to the regular loop but we must track local_pixels_remaining;
while (local_pixels_remaining > 0) {
c = *encoding++;
if (RAW_PIXELS_SHORT == (c & 0xc0)) {
// count is already pairs of pixels count
int pair_count = ((c & 0x3f) + 1);
const uint8_t *end = encoding + pair_count;
int run_length = pair_count * 2;
local_pixels_remaining -= run_length;
if (local_pixels_remaining >= 0) {
output_4bit_paletted_pixels_ff(output, palette_entries, encoding, run_length);
} else {
run_length += local_pixels_remaining;
span_assert(run_length >= 0);
output_4bit_paletted_pixels_fx(output, palette_entries, encoding, run_length);
encoding = end;
}
span_assert(encoding == end);
} else if (COLOR_PIXELS_SHORT == (c & 0xc0)) {
int run_length = ((c & 0x3f) + MIN_COLOR_SPAN_4BIT);
uint16_t color = palette_entries[*encoding++];
local_pixels_remaining -= run_length;
// todo collapse these into a single call?
if (local_pixels_remaining < 0) {
run_length += local_pixels_remaining;
output_color_run_of_any_size(output, color, run_length);
} else {
output_color_run_of_min_size(output, color, run_length);
}
} else if (SINGLE_PIXEL == (c & 0xf0)) {
uint16_t color = palette_entries[c & 0xf];
// since the span is not clipped its one pixel must not be
output_color_one_pixel(output, color);
local_pixels_remaining--;
} else if (c == COLOR_PIXELS_LONG) {
int run_length = 1 + *encoding++;
run_length += (*encoding++) << 8;
local_pixels_remaining -= run_length;
uint16_t color = palette_entries[*encoding++];
// todo collapse these into a single call? more so because this is a long run
if (local_pixels_remaining < 0) {
run_length += local_pixels_remaining;
output_color_run_of_any_size(output, color, run_length);
} else {
output_color_run_of_min_size(output, color, run_length);
}
} else if (c == RAW_PIXELS_LONG) {
int run_length = 1 + *encoding++;
run_length += (*encoding++) << 8;
assert(!(run_length & 1)); // we always have even numbers of pixels
const uint8_t *end = encoding + (run_length >> 1);
local_pixels_remaining -= run_length;
if (local_pixels_remaining >= 0) {
output_4bit_paletted_pixels_ff(output, palette_entries, encoding, run_length);
} else {
run_length += local_pixels_remaining;
span_assert(run_length >= 0);
output_4bit_paletted_pixels_fx(output, palette_entries, encoding, run_length);
encoding = end;
}
span_assert(encoding == end);
} else if (c == END_OF_LINE) {
break;
} else {
return -1;
}
}
}
}
}
*output++ = COMPOSABLE_RAW_1P;
*output++ = 0;
if (2u & (uintptr_t) output) {
// we are unaligned
*output++ = COMPOSABLE_EOL_ALIGN;
} else {
*output++ = COMPOSABLE_EOL_SKIP_ALIGN;
*output++ = 0xffff; // eye catcher
// output++;
}
// *output ++ = 29;
// *output ++ = 29;
assert(output <= output_end);
assert(0 == (3u & (uintptr_t) output));
return ((uint32_t *) output) - render_spans_buffer;
}
#ifdef __arm__
#pragma GCC pop_options
#endif