Optimize ili9488 driver

I implemented several code optimizations from
a presentation by Damien George I found on youtube.
The optimizations are on a slide here:
https://youtu.be/hHec4qL00x0?t=813

1) Change __init__ to use bit 5 in madctl register
   to swap row/column for landscape mode.

2) Eliminate functions _lscopy and _lscopy_gs.
   They are not needed because the ili9488 chip
   is handling orientation.
   Results in simplification in show() and
   do_refresh().

3) Minor optimizations to _lcopy and _lcopy_gs.
   Reverse order processing bytes.
   Specific changes:
   - change while condition from 'x < length' to
     simply 'length'. This test is faster in viper.
   - use length as index into source instead of x.
     Variable x is removed.
     This requires the source and dest to be
     processed from end to beginning.
   - other optimizations from the Pycon talk
     given by Damien George.

4) Optimizations to show() and do_refresh().
   - Caching in local variables
   - Changes to support writing more pixels
     for each call to spi.write().
     Saving from this are larger on ESP32
     than on Pico and Pico 2.

The Damien George optimizations save 10 to 20 milliseconds
per screen refresh on my ESP32. I expect similar results
for Pico and Pico2.

The saving from fewer calls to spi.write() is significant
on my ESP32. 60 to 80 milliseconds.
The saving on Pico and Pico2 are smaller, 10 to 20 milliseconds.
pull/101/head
Carl Pottle 2025-04-16 15:43:35 -07:00
rodzic cd5210a132
commit f6c3cf29bb
1 zmienionych plików z 102 dodań i 184 usunięć

Wyświetl plik

@ -22,122 +22,64 @@ import framebuf
import asyncio import asyncio
from drivers.boolpalette import BoolPalette from drivers.boolpalette import BoolPalette
# Do processing from end to beginning for
# Portrait mode greyscale # small performance improvement.
# greyscale
@micropython.viper @micropython.viper
def _lcopy_gs(dest: ptr8, source: ptr8, length: int): def _lcopy_gs(dest: ptr8, source: ptr8, length: int) :
# rgb666 - 18bit/pixel # rgb666 - 18bit/pixel
n: int = 0 n: int = length * 6 - 1
x: int = 0 while length:
while x < length: length -= 1
c: uint = source[x] c : uint = source[length]
# Store the index in the 4 high order bits # Store the index in the 4 high order bits
p: uint = c & 0xF0 # current pixel p : uint = c & 0xF0 # current pixel
q: uint = c << 4 # next pixel q : uint = c << 4 # next pixel
dest[n] = p
n += 1
dest[n] = p
n += 1
dest[n] = p
n += 1
dest[n] = q dest[n] = q
n += 1 n -= 1
dest[n] = q dest[n] = q
n += 1 n -= 1
dest[n] = q dest[n] = q
n += 1 n -= 1
x += 1 dest[n] = p
n -= 1
dest[n] = p
n -= 1
dest[n] = p
n -= 1
# Do processing from end to beginning for
# Portrait mode color # small performance improvement.
# color
@micropython.viper @micropython.viper
def _lcopy(dest: ptr8, source: ptr8, lut: ptr16, length: int): def _lcopy(dest: ptr8, source: ptr8, lut: ptr16, length: int) :
# Convert lut rgb 565 to rgb666 # Convert lut rgb 565 to rgb666
n: int = 0 n: int = length * 6 - 1
x: int = 0 while length:
while x < length: length -= 1
c: uint = source[x] c : uint = source[length]
p: uint = c >> 4 # current pixel
q = c & 0x0F # next pixel
v: uint16 = lut[p] v = lut[c & 0x0F] # next pixel
dest[n] = (v & 0xF800) >> 8 # R
n += 1
dest[n] = (v & 0x07E0) >> 3 # G
n += 1
dest[n] = (v & 0x001F) << 3 # B dest[n] = (v & 0x001F) << 3 # B
n += 1 n -= 1
v = lut[q]
dest[n] = (v & 0xF800) >> 8 # R
n += 1
dest[n] = (v & 0x07E0) >> 3 # G dest[n] = (v & 0x07E0) >> 3 # G
n += 1 n -= 1
dest[n] = (v & 0x001F) << 3 # B
n += 1
x += 1
# FB is in landscape mode greyscale
@micropython.viper
def _lscopy_gs(dest: ptr8, source: ptr8, ch: int):
col = ch & 0x1FF # Unpack (viper old 4 parameter limit)
height = (ch >> 9) & 0x1FF
wbytes = ch >> 19 # Width in bytes is width // 2
# rgb666 - 18bit/pixel
n = 0
clsb = col & 1
idx = col >> 1 # 2 pixels per byte
while height:
if clsb:
c = source[idx] << 4
else:
c = source[idx] & 0xF0
dest[n] = c
n += 1
dest[n] = c
n += 1
dest[n] = c
n += 1
idx += wbytes
height -= 1
# FB is in landscape mode color, hence issue a column at a time to portrait mode hardware.
@micropython.viper
def _lscopy(dest: ptr8, source: ptr8, lut: ptr16, ch: int):
# Convert lut rgb 565 to rgb666
col = ch & 0x1FF # Unpack (viper old 4 parameter limit)
height = (ch >> 9) & 0x1FF
wbytes = ch >> 19 # Width in bytes is width // 2
n = 0
clsb = col & 1
idx = col >> 1 # 2 pixels per byte
while height:
if clsb:
c = source[idx] & 0x0F
else:
c = source[idx] >> 4
v: uint16 = lut[c]
dest[n] = (v & 0xF800) >> 8 # R dest[n] = (v & 0xF800) >> 8 # R
n += 1 n -= 1
dest[n] = (v & 0x07E0) >> 3 # G
n += 1 v : uint = lut[c >> 4] # current pixel
dest[n] = (v & 0x001F) << 3 # B dest[n] = (v & 0x001F) << 3 # B
n += 1 n -= 1
dest[n] = (v & 0x07E0) >> 3 # G
idx += wbytes n -= 1
height -= 1 dest[n] = (v & 0xF800) >> 8 # R
n -= 1
class ILI9488(framebuf.FrameBuffer): class ILI9488(framebuf.FrameBuffer):
lut = bytearray(32) lut = bytearray(32)
COLOR_INVERT = 0 COLOR_INVERT = 0
# Convert r, g, b in range 0-255 to a 16 bit colour value # Convert r, g, b in range 0-255 to a 16 bit colour value
@ -149,7 +91,8 @@ class ILI9488(framebuf.FrameBuffer):
# Transpose width & height for landscape mode # Transpose width & height for landscape mode
def __init__( def __init__(
self, spi, cs, dc, rst, height=320, width=480, usd=False, mirror=False, init_spi=False self, spi, cs, dc, rst, height=320, width=480, usd=False, mirror=False, init_spi=False,
lines_per_write=4
): ):
self._spi = spi self._spi = spi
self._cs = cs self._cs = cs
@ -158,17 +101,21 @@ class ILI9488(framebuf.FrameBuffer):
self.lock_mode = False # If set, user lock is passed to .do_refresh self.lock_mode = False # If set, user lock is passed to .do_refresh
self.height = height # Logical dimensions for GUIs self.height = height # Logical dimensions for GUIs
self.width = width self.width = width
self._long = max(height, width) # Physical dimensions of screen and aspect ratio
self._short = min(height, width)
self._spi_init = init_spi self._spi_init = init_spi
self._gscale = False # Interpret buffer as index into color LUT self._gscale = False # Interpret buffer as index into color LUT
self.mode = framebuf.GS4_HMSB self.mode = framebuf.GS4_HMSB
self.palette = BoolPalette(self.mode) self.palette = BoolPalette(self.mode)
#
# lines_per_write must divide evenly into height
#
if (self.height % lines_per_write) != 0 :
raise ValueError('lines_per_write invalid')
self._lines_per_write=lines_per_write
gc.collect() gc.collect()
buf = bytearray(height * width // 2) buf = bytearray(height * width // 2)
self.mvb = memoryview(buf) self.mvb = memoryview(buf)
super().__init__(buf, width, height, self.mode) # Logical aspect ratio super().__init__(buf, width, height, self.mode) # Logical aspect ratio
self._linebuf = bytearray(self._short * 3) self._linebuf = bytearray(self._lines_per_write*self.width * 3)
# Hardware reset # Hardware reset
self._rst(0) self._rst(0)
@ -185,18 +132,18 @@ class ILI9488(framebuf.FrameBuffer):
self._wcmd(b"\x11") # sleep out self._wcmd(b"\x11") # sleep out
sleep_ms(20) sleep_ms(20)
self._wcd(b"\x3a", b"\x66") # interface pixel format 18 bits per pixel self._wcd(b"\x3a", b"\x66") # interface pixel format 18 bits per pixel
# Normally use defaults. This allows it to work on the Waveshare board with a
# shift register. If size is not 320x480 assume no shift register. self._wcd(b"\x2a", int.to_bytes(self.width - 1, 4, "big"))
# Default column address start == 0, end == 0x13F (319) self._wcd(b"\x2b", int.to_bytes(self.height - 1, 4, "big")) # SET_PAGE ht
if self._short != 320: # Not the Waveshare board: no shift register
self._wcd(b"\x2a", int.to_bytes(self._short - 1, 4, "big")) if self.width > self.height :
# Default page address start == 0 end == 0x1DF (479) # landscape
if self._long != 480: madctl = 0xe8 if usd else 0x28
self._wcd(b"\x2b", int.to_bytes(self._long - 1, 4, "big")) # SET_PAGE ht else :
# self._wcd(b"\x36", b"\x48" if usd else b"\x88") # MADCTL: RGB portrait mode #portrait
madctl = 0x48 if usd else 0x88 madctl = 0x48 if usd else 0x88
if mirror: if mirror:
madctl ^= 0x80 madctl ^= 0x80 # toggle MY
self._wcd(b"\x36", madctl.to_bytes(1, "big")) # MADCTL: RGB portrait mode self._wcd(b"\x36", madctl.to_bytes(1, "big")) # MADCTL: RGB portrait mode
self._wcmd(b"\x11") # sleep out self._wcmd(b"\x11") # sleep out
self._wcmd(b"\x29") # display on self._wcmd(b"\x29") # display on
@ -226,7 +173,6 @@ class ILI9488(framebuf.FrameBuffer):
# @micropython.native # Made almost no difference to timing # @micropython.native # Made almost no difference to timing
def show(self): # Physical display is in portrait mode def show(self): # Physical display is in portrait mode
clut = ILI9488.lut
lb = self._linebuf lb = self._linebuf
buf = self.mvb buf = self.mvb
cm = self._gscale # color False, greyscale True cm = self._gscale # color False, greyscale True
@ -235,30 +181,22 @@ class ILI9488(framebuf.FrameBuffer):
self._wcmd(b"\x2c") # WRITE_RAM self._wcmd(b"\x2c") # WRITE_RAM
self._dc(1) self._dc(1)
self._cs(0) self._cs(0)
if self.width < self.height: # Portrait 350 ms on ESP32 160 MHz, 26.6 MHz SPI clock wd = self.width >> 1
wd = self.width // 2
ht = self.height ht = self.height
if cm: spi_write = self._spi.write
for start in range(0, wd * ht, wd): # For each line length = self._lines_per_write*wd
_lcopy_gs(lb, buf[start:], wd) # Copy greyscale r = range(0, wd * ht, length)
self._spi.write(lb) if cm :
else: lcopy = _lcopy_gs # Copy greyscale
for start in range(0, wd * ht, wd): # For each line for start in r : # For each line
_lcopy(lb, buf[start:], clut, wd) # Copy and map colors lcopy(lb, buf[start:], length)
self._spi.write(lb) spi_write(lb)
else: # Landscape 370 ms on ESP32 160 MHz, 26.6 MHz SPI clock else :
width = self.width clut = ILI9488.lut
wd = width - 1 lcopy = _lcopy # Copy and map colors
cargs = (self.height << 9) + (width << 18) # Viper 4-arg limit for start in r : # For each line
if cm: lcopy(lb, buf[start:], clut, length)
for col in range(width): # For each column of landscape display spi_write(lb)
_lscopy_gs(lb, buf, wd - col + cargs) # Copy greyscale
self._spi.write(lb)
else:
for col in range(width): # For each column of landscape display
_lscopy(lb, buf, clut, wd - col + cargs) # Copy and map colors
self._spi.write(lb)
self._cs(1) self._cs(1)
def short_lock(self, v=None): def short_lock(self, v=None):
@ -272,58 +210,38 @@ class ILI9488(framebuf.FrameBuffer):
if elock is None: if elock is None:
elock = asyncio.Lock() elock = asyncio.Lock()
async with self._lock: async with self._lock:
lines, mod = divmod(self._long, split) # Lines per segment lines, mod = divmod(self.height, split) # Lines per segment
if mod: if mod:
raise ValueError("Invalid do_refresh arg.") raise ValueError("Invalid do_refresh arg 'split'")
if lines % self._lines_per_write != 0 :
raise ValueError("Invalid do_refresh arg 'split' for lines_per_write of %d" %(self._lines_per_write))
clut = ILI9488.lut clut = ILI9488.lut
lb = self._linebuf lb = self._linebuf
buf = self.mvb buf = self.mvb
cm = self._gscale # color False, greyscale True cm = self._gscale # color False, greyscale True
self._wcmd(b"\x2c") # WRITE_RAM self._wcmd(b"\x2c") # WRITE_RAM
self._dc(1) self._dc(1)
if self.width < self.height: # Portrait: write sets of rows
wd = self.width // 2 wd = self.width // 2
line = 0 line = 0
spi_write = self._spi.write
length = self._lines_per_write*wd
for _ in range(split): # For each segment for _ in range(split): # For each segment
async with elock: async with elock:
if self._spi_init: # A callback was passed if self._spi_init: # A callback was passed
self._spi_init(self._spi) # Bus may be shared self._spi_init(self._spi) # Bus may be shared
self._cs(0) self._cs(0)
r = range(wd * line, wd * (line + lines), length)
if cm: if cm:
for start in range( lcopy = _lcopy_gs # Copy and greyscale
wd * line, wd * (line + lines), wd for start in r :
): # For each line lcopy(lb, buf[start:], length)
_lcopy_gs(lb, buf[start:], wd) # Copy and greyscale spi_write(lb)
self._spi.write(lb) else :
else: lcopy = _lcopy # Copy and map colors
for start in range( for start in r :
wd * line, wd * (line + lines), wd lcopy(lb, buf[start:], clut, length)
): # For each line spi_write(lb)
_lcopy(lb, buf[start:], clut, wd) # Copy and map colors
self._spi.write(lb)
line += lines line += lines
self._cs(1) # Allow other tasks to use bus self._cs(1) # Allow other tasks to use bus
await asyncio.sleep_ms(0) await asyncio.sleep_ms(0)
else: # Landscape: write sets of cols. lines is no. of cols per segment.
cargs = (self.height << 9) + (self.width << 18) # Viper 4-arg limit
sc = self.width - 1 # Start and end columns
ec = sc - lines # End column
for _ in range(split): # For each segment
async with elock:
if self._spi_init: # A callback was passed
self._spi_init(self._spi) # Bus may be shared
self._cs(0)
if cm:
for col in range(sc, ec, -1): # For each column of landscape display
_lscopy_gs(lb, buf, col + cargs) # Copy and map colors
self._spi.write(lb)
else:
for col in range(sc, ec, -1): # For each column of landscape display
_lscopy(lb, buf, clut, col + cargs) # Copy and map colors
self._spi.write(lb)
sc -= lines
ec -= lines
self._cs(1) # Allow other tasks to use bus
await asyncio.sleep_ms(0)