1152 lines
50 KiB
C++
1152 lines
50 KiB
C++
|
/*-------------------------------------------------------------------------
|
||
|
Arduino library to control a wide variety of WS2811- and WS2812-based RGB
|
||
|
LED devices such as Adafruit FLORA RGB Smart Pixels and NeoPixel strips.
|
||
|
|
||
|
Currently handles 800 KHz bitstreams on 8, 10, 12, 16, and 20 MHz ATtiny
|
||
|
MCUs with ATTinyCore 1.30+, 8, 10, 12, 16, 20, 24, 28, and 32 MHz AVRxt
|
||
|
tinyAVR 0/1/2-series parts with megaTinyCore 1.0.3+ and those speeds
|
||
|
plus the ridiculously overclocked 36, 40, 44, and 48 MHz speeds with
|
||
|
AVR Dx-series parts. Note that the highest speeds have not been tested
|
||
|
and it would be surprising if the parts could be pushed that far.
|
||
|
|
||
|
Like the Adafruit original version, it supports LEDs wired
|
||
|
for various color orders. 400 kHz support was never included.
|
||
|
Nobody has ever asked about it, nor have I seen any 400 kHz LEDs for sale.
|
||
|
Ever.
|
||
|
|
||
|
Written by Phil Burgess / Paint Your Dragon for Adafruit Industries,
|
||
|
contributions by PJRC, Michael Miller and other members of the open
|
||
|
source community.
|
||
|
|
||
|
Extensive porting to additional parts and different clock speeds by
|
||
|
Spence Konde.
|
||
|
|
||
|
Adafruit invests time and resources providing this open source code,
|
||
|
please support Adafruit and open-source hardware by purchasing products
|
||
|
from Adafruit!
|
||
|
|
||
|
Same goes for Spence too!
|
||
|
|
||
|
-------------------------------------------------------------------------
|
||
|
This file is part of the tinyNeoPixel library derived from
|
||
|
Adafruit_NeoPixel.
|
||
|
|
||
|
NeoPixel is free software: you can redistribute it and/or modify
|
||
|
it under the terms of the GNU Lesser General Public License as
|
||
|
published by the Free Software Foundation, either version 3 of
|
||
|
the License, or (at your option) any later version.
|
||
|
|
||
|
NeoPixel is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU Lesser General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Lesser General Public
|
||
|
License along with NeoPixel. If not, see
|
||
|
<http://www.gnu.org/licenses/>.
|
||
|
-------------------------------------------------------------------------*/
|
||
|
|
||
|
#include "tinyNeoPixel_Static.h"
|
||
|
|
||
|
// Constructor when length, pin and type are known at compile-time:
|
||
|
tinyNeoPixel::tinyNeoPixel(uint16_t n, uint8_t p, neoPixelType t, uint8_t *pxl) :
|
||
|
brightness(0), pixels(pxl), endTime(0) {
|
||
|
//boolean oldThreeBytesPerPixel = (wOffset == rOffset); // false if RGBW
|
||
|
wOffset = (t >> 6) & 0b11; // See notes in header file
|
||
|
rOffset = (t >> 4) & 0b11; // regarding R/G/B/W offsets
|
||
|
gOffset = (t >> 2) & 0b11;
|
||
|
bOffset = t & 0b11;
|
||
|
numBytes = n * ((wOffset == rOffset) ? 3 : 4);
|
||
|
numLEDs = n;
|
||
|
pin = p;
|
||
|
port = portOutputRegister(digitalPinToPort(p));
|
||
|
pinMask = digitalPinToBitMask(p);
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
tinyNeoPixel::~tinyNeoPixel() {
|
||
|
//if (pixels) free(pixels);
|
||
|
//if (pin >= 0) pinMode(pin, INPUT);
|
||
|
}
|
||
|
|
||
|
// *INDENT-OFF* astyle don't like assembly
|
||
|
void tinyNeoPixel::show(void) {
|
||
|
|
||
|
if ((!pixels) || pin >= NUM_DIGITAL_PINS) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// Data latch = 50+ microsecond pause in the output stream. Rather than
|
||
|
// put a delay at the end of the function, the ending time is noted and
|
||
|
// the function will simply hold off (if needed) on issuing the
|
||
|
// subsequent round of data until the latch time has elapsed. This
|
||
|
// allows the mainline code to start generating the next frame of data
|
||
|
// rather than stalling for the latch.
|
||
|
while (!canShow());
|
||
|
// endTime is a private member (rather than global var) so that multiple
|
||
|
// instances on different pins can be quickly issued in succession (each
|
||
|
// instance doesn't delay the next).
|
||
|
|
||
|
// In order to make this code runtime-configurable to work with any pin,
|
||
|
// SBI/CBI instructions are eschewed in favor of full PORT writes via the
|
||
|
// OUT or ST instructions. It relies on two facts: that peripheral
|
||
|
// functions (such as PWM) take precedence on output pins, so our PORT-
|
||
|
// wide writes won't interfere, and that interrupts are globally disabled
|
||
|
// while data is being issued to the LEDs, so no other code will be
|
||
|
// accessing the PORT. The code takes an initial 'snapshot' of the PORT
|
||
|
// state, computes 'pin high' and 'pin low' values, and writes these back
|
||
|
// to the PORT register as needed.
|
||
|
|
||
|
noInterrupts(); // Need 100% focus on instruction timing
|
||
|
|
||
|
|
||
|
// AVRxt MCUs -- tinyAVR 0/1/2, megaAVR 0, AVR Dx ----------------------
|
||
|
// with extended maximum speeds to support vigorously overclocked
|
||
|
// Dx-series parts. This is by no means intended to imply that they will
|
||
|
// run at those speeds, only that - if they do - you can control WS2812s
|
||
|
// with them.
|
||
|
|
||
|
volatile uint16_t
|
||
|
i = numBytes; // Loop counter
|
||
|
volatile uint8_t
|
||
|
*ptr = pixels, // Pointer to next byte
|
||
|
b = *ptr++, // Current byte value
|
||
|
hi, // PORT w/output bit set high
|
||
|
lo; // PORT w/output bit set low
|
||
|
|
||
|
// Hand-tuned assembly code issues data to the LED drivers at a specific
|
||
|
// rate. There's separate code for different CPU speeds (8, 12, 16 MHz)
|
||
|
// for both the WS2811 (400 KHz) and WS2812 (800 KHz) drivers. The
|
||
|
// datastream timing for the LED drivers allows a little wiggle room each
|
||
|
// way (listed in the datasheets), so the conditions for compiling each
|
||
|
// case are set up for a range of frequencies rather than just the exact
|
||
|
// 8, 12 or 16 MHz values, permitting use with some close-but-not-spot-on
|
||
|
// devices (e.g. 16.5 MHz DigiSpark). The ranges were arrived at based
|
||
|
// on the datasheet figures and have not been extensively tested outside
|
||
|
// the canonical 8/12/16 MHz speeds; there's no guarantee these will work
|
||
|
// close to the extremes (or possibly they could be pushed further).
|
||
|
// Keep in mind only one CPU speed case actually gets compiled; the
|
||
|
// resulting program isn't as massive as it might look from source here.
|
||
|
|
||
|
// 8 MHz(ish) AVRxt ---------------------------------------------------------
|
||
|
#if (F_CPU >= 7400000UL) && (F_CPU <= 9500000UL)
|
||
|
|
||
|
volatile uint8_t n1, n2 = 0; // First, next bits out
|
||
|
|
||
|
// We need to be able to write to the port register in one clock
|
||
|
// to meet timing constraints here.
|
||
|
|
||
|
// 10 instruction clocks per bit: HHxxxxxLLL
|
||
|
// OUT instructions: ^ ^ ^ (T=0,2,7)
|
||
|
|
||
|
hi = *port | pinMask;
|
||
|
lo = *port & ~pinMask;
|
||
|
n1 = lo;
|
||
|
if (b & 0x80) n1 = hi;
|
||
|
|
||
|
// Dirty trick: RJMPs proceeding to the next instruction are used
|
||
|
// to delay two clock cycles in one instruction word (rather than
|
||
|
// using two NOPs). This was necessary in order to squeeze the
|
||
|
// loop down to exactly 64 words -- the maximum possible for a
|
||
|
// relative branch.
|
||
|
|
||
|
asm volatile(
|
||
|
"headD:" "\n\t" // Clk Pseudocode
|
||
|
// Bit 7:
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi
|
||
|
"mov %[n2] , %[lo]" "\n\t" // 1 n2 = lo
|
||
|
"st %a[port], %[n1]" "\n\t" // 1 PORT = n1
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
"sbrc %[byte] , 6" "\n\t" // 1-2 if (b & 0x40)
|
||
|
"mov %[n2] , %[hi]" "\n\t" // 0-1 n2 = hi
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
// Bit 6:
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi
|
||
|
"mov %[n1] , %[lo]" "\n\t" // 1 n1 = lo
|
||
|
"st %a[port], %[n2]" "\n\t" // 1 PORT = n2
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
"sbrc %[byte] , 5" "\n\t" // 1-2 if (b & 0x20)
|
||
|
"mov %[n1] , %[hi]" "\n\t" // 0-1 n1 = hi
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
// Bit 5:
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi
|
||
|
"mov %[n2] , %[lo]" "\n\t" // 1 n2 = lo
|
||
|
"st %a[port], %[n1]" "\n\t" // 1 PORT = n1
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
"sbrc %[byte] , 4" "\n\t" // 1-2 if (b & 0x10)
|
||
|
"mov %[n2] , %[hi]" "\n\t" // 0-1 n2 = hi
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
// Bit 4:
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi
|
||
|
"mov %[n1] , %[lo]" "\n\t" // 1 n1 = lo
|
||
|
"st %a[port], %[n2]" "\n\t" // 1 PORT = n2
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
"sbrc %[byte] , 3" "\n\t" // 1-2 if (b & 0x08)
|
||
|
"mov %[n1] , %[hi]" "\n\t" // 0-1 n1 = hi
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
// Bit 3:
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi
|
||
|
"mov %[n2] , %[lo]" "\n\t" // 1 n2 = lo
|
||
|
"st %a[port], %[n1]" "\n\t" // 1 PORT = n1
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
"sbrc %[byte] , 2" "\n\t" // 1-2 if (b & 0x04)
|
||
|
"mov %[n2] , %[hi]" "\n\t" // 0-1 n2 = hi
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
// Bit 2:
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi
|
||
|
"mov %[n1] , %[lo]" "\n\t" // 1 n1 = lo
|
||
|
"st %a[port], %[n2]" "\n\t" // 1 PORT = n2
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
"sbrc %[byte] , 1" "\n\t" // 1-2 if (b & 0x02)
|
||
|
"mov %[n1] , %[hi]" "\n\t" // 0-1 n1 = hi
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
// Bit 1:
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi
|
||
|
"mov %[n2] , %[lo]" "\n\t" // 1 n2 = lo
|
||
|
"st %a[port], %[n1]" "\n\t" // 1 PORT = n1
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
"sbrc %[byte] , 0" "\n\t" // 1-2 if (b & 0x01)
|
||
|
"mov %[n2] , %[hi]" "\n\t" // 0-1 n2 = hi
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo
|
||
|
"sbiw %[count], 1" "\n\t" // 2 i-- (don't act on Z flag yet)
|
||
|
// Bit 0:
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi
|
||
|
"mov %[n1] , %[lo]" "\n\t" // 1 n1 = lo
|
||
|
"st %a[port], %[n2]" "\n\t" // 1 PORT = n2
|
||
|
"ld %[byte] , %a[ptr]+" "\n\t" // 2 b = *ptr++
|
||
|
"sbrc %[byte] , 7" "\n\t" // 1-2 if (b & 0x80)
|
||
|
"mov %[n1] , %[hi]" "\n\t" // 0-1 n1 = hi
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo
|
||
|
"brne headD" "\n" // 2 while(i) (Z flag set above)
|
||
|
: [port] "+e" (port),
|
||
|
[byte] "+r" (b),
|
||
|
[n1] "+r" (n1),
|
||
|
[n2] "+r" (n2),
|
||
|
[count] "+w" (i)
|
||
|
: [ptr] "e" (ptr),
|
||
|
[hi] "r" (hi),
|
||
|
[lo] "r" (lo));
|
||
|
|
||
|
#elif (F_CPU >= 9500000UL) && (F_CPU <= 11100000UL)
|
||
|
/*
|
||
|
volatile uint8_t n1, n2 = 0; // First, next bits out
|
||
|
|
||
|
*/
|
||
|
// 14 instruction clocks per bit: HHHHxxxxLLLLL
|
||
|
// ST instructions: ^ ^ ^ (T=0,4,7)
|
||
|
volatile uint8_t next;
|
||
|
|
||
|
hi = *port | pinMask;
|
||
|
lo = *port & ~pinMask;
|
||
|
next = lo;
|
||
|
if (b & 0x80) {
|
||
|
next = hi;
|
||
|
}
|
||
|
|
||
|
// Don't "optimize" the OUT calls into the bitTime subroutine;
|
||
|
// we're exploiting the RCALL and RET as 3- and 4-cycle NOPs!
|
||
|
asm volatile(
|
||
|
"headD:" "\n\t" // (T = 0)
|
||
|
"st %a[port], %[hi]" "\n\t" // (T = 1)
|
||
|
"rcall bitTimeD" "\n\t" // Bit 7 (T = 14)
|
||
|
"st %a[port], %[hi]" "\n\t"
|
||
|
"rcall bitTimeD" "\n\t" // Bit 6
|
||
|
"st %a[port], %[hi]" "\n\t"
|
||
|
"rcall bitTimeD" "\n\t" // Bit 5
|
||
|
"st %a[port], %[hi]" "\n\t"
|
||
|
"rcall bitTimeD" "\n\t" // Bit 4
|
||
|
"st %a[port], %[hi]" "\n\t"
|
||
|
"rcall bitTimeD" "\n\t" // Bit 3
|
||
|
"st %a[port], %[hi]" "\n\t"
|
||
|
"rcall bitTimeD" "\n\t" // Bit 2
|
||
|
"st %a[port], %[hi]" "\n\t"
|
||
|
"rcall bitTimeD" "\n\t" // Bit 1
|
||
|
// Bit 0:
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi (T = 1)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 3)
|
||
|
"ld %[byte] , %a[ptr]+" "\n\t" // 2 b = *ptr++ (T = 5)
|
||
|
"st %a[port], %[next]" "\n\t" // 1 PORT = next (T = 6)
|
||
|
"mov %[next] , %[lo]" "\n\t" // 1 next = lo (T = 7)
|
||
|
"sbrc %[byte] , 7" "\n\t" // 1-2 if (b & 0x80) (T = 8)
|
||
|
"mov %[next] , %[hi]" "\n\t" // 0-1 next = hi (T = 9)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 10)
|
||
|
"sbiw %[count], 1" "\n\t" // 2 i-- (T = 12)
|
||
|
"brne headD" "\n\t" // 2 if (i != 0) -> (next byte)
|
||
|
"rjmp doneD" "\n\t"
|
||
|
"bitTimeD:" "\n\t" // nop nop nop (T = 4)
|
||
|
"st %a[port], %[next]" "\n\t" // 1 PORT = next (T = 5)
|
||
|
"mov %[next], %[lo]" "\n\t" // 1 next = lo (T = 6)
|
||
|
"rol %[byte]" "\n\t" // 1 b <<= 1 (T = 7)
|
||
|
"sbrc %[byte], 7" "\n\t" // 1-2 if (b & 0x80) (T = 8)
|
||
|
"mov %[next], %[hi]" "\n\t" // 0-1 next = hi (T = 9)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 10)
|
||
|
"ret" "\n\t" // 4 nop nop nop nop (T = 14)
|
||
|
"doneD:" "\n"
|
||
|
: [port] "+e" (port),
|
||
|
[byte] "+r" (b),
|
||
|
[next] "+r" (next),
|
||
|
[count] "+w" (i)
|
||
|
: [ptr] "e" (ptr),
|
||
|
[hi] "r" (hi),
|
||
|
[lo] "r" (lo));
|
||
|
|
||
|
|
||
|
|
||
|
// 12 MHz(ish) AVRxt --------------------------------------------------------
|
||
|
#elif (F_CPU >= 11100000UL) && (F_CPU <= 14300000UL)
|
||
|
|
||
|
// In the 12 MHz case, an optimized 800 KHz datastream (no dead time
|
||
|
// between bytes) requires a PORT-specific loop similar to the 8 MHz
|
||
|
// code (but a little more relaxed in this case).
|
||
|
|
||
|
// 15 instruction clocks per bit: HHHHxxxxxxLLLLL
|
||
|
// OUT instructions: ^ ^ ^ (T=0,4,10)
|
||
|
|
||
|
volatile uint8_t next;
|
||
|
|
||
|
hi = *port | pinMask;
|
||
|
lo = *port & ~pinMask;
|
||
|
next = lo;
|
||
|
if (b & 0x80) {
|
||
|
next = hi;
|
||
|
}
|
||
|
|
||
|
// Don't "optimize" the OUT calls into the bitTime subroutine;
|
||
|
// we're exploiting the RCALL and RET as 3- and 4-cycle NOPs!
|
||
|
asm volatile(
|
||
|
"headD:" "\n\t" // (T = 0)
|
||
|
"st %a[port], %[hi]" "\n\t" // (T = 1)
|
||
|
"rcall bitTimeD" "\n\t" // Bit 7 (T = 15)
|
||
|
"st %a[port], %[hi]" "\n\t"
|
||
|
"rcall bitTimeD" "\n\t" // Bit 6
|
||
|
"st %a[port], %[hi]" "\n\t"
|
||
|
"rcall bitTimeD" "\n\t" // Bit 5
|
||
|
"st %a[port], %[hi]" "\n\t"
|
||
|
"rcall bitTimeD" "\n\t" // Bit 4
|
||
|
"st %a[port], %[hi]" "\n\t"
|
||
|
"rcall bitTimeD" "\n\t" // Bit 3
|
||
|
"st %a[port], %[hi]" "\n\t"
|
||
|
"rcall bitTimeD" "\n\t" // Bit 2
|
||
|
"st %a[port], %[hi]" "\n\t"
|
||
|
"rcall bitTimeD" "\n\t" // Bit 1
|
||
|
// Bit 0:
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi (T = 1)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 3)
|
||
|
"ld %[byte] , %a[ptr]+" "\n\t" // 2 b = *ptr++ (T = 5)
|
||
|
"st %a[port], %[next]" "\n\t" // 1 PORT = next (T = 6)
|
||
|
"mov %[next] , %[lo]" "\n\t" // 1 next = lo (T = 7)
|
||
|
"sbrc %[byte] , 7" "\n\t" // 1-2 if (b & 0x80) (T = 8)
|
||
|
"mov %[next] , %[hi]" "\n\t" // 0-1 next = hi (T = 9)
|
||
|
"nop" "\n\t" // 1 (T = 10)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 11)
|
||
|
"sbiw %[count], 1" "\n\t" // 2 i-- (T = 13)
|
||
|
"brne headD" "\n\t" // 2 if (i != 0) -> (next byte)
|
||
|
"rjmp doneD" "\n\t"
|
||
|
"bitTimeD:" "\n\t" // nop nop nop (T = 4)
|
||
|
"st %a[port], %[next]" "\n\t" // 1 PORT = next (T = 5)
|
||
|
"mov %[next], %[lo]" "\n\t" // 1 next = lo (T = 6)
|
||
|
"rol %[byte]" "\n\t" // 1 b <<= 1 (T = 7)
|
||
|
"sbrc %[byte], 7" "\n\t" // 1-2 if (b & 0x80) (T = 8)
|
||
|
"mov %[next], %[hi]" "\n\t" // 0-1 next = hi (T = 9)
|
||
|
"nop" "\n\t" // 1 (T = 10)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 11)
|
||
|
"ret" "\n\t" // 4 nop nop nop nop (T = 15)
|
||
|
"doneD:" "\n"
|
||
|
: [port] "+e" (port),
|
||
|
[byte] "+r" (b),
|
||
|
[next] "+r" (next),
|
||
|
[count] "+w" (i)
|
||
|
: [ptr] "e" (ptr),
|
||
|
[hi] "r" (hi),
|
||
|
[lo] "r" (lo));
|
||
|
|
||
|
|
||
|
// 16 MHz(ish) AVRxt ------------------------------------------------------
|
||
|
#elif (F_CPU >= 15400000UL) && (F_CPU <= 19000000L)
|
||
|
|
||
|
// WS2811 and WS2812 have different hi/lo duty cycles; this is
|
||
|
// similar but NOT an exact copy of the prior 400-on-8 code.
|
||
|
|
||
|
// 20 inst. clocks per bit: HHHHHxxxxxxxxLLLLLLL
|
||
|
// ST instructions: ^ ^ ^ (T=0,5,13)
|
||
|
|
||
|
volatile uint8_t next, bit;
|
||
|
|
||
|
hi = *port | pinMask;
|
||
|
lo = *port & ~pinMask;
|
||
|
next = lo;
|
||
|
bit = 8;
|
||
|
|
||
|
asm volatile(
|
||
|
"head20:" "\n\t" // Clk Pseudocode (T = 0)
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi (T = 1)
|
||
|
"nop" "\n\t" // 1 nop (T = 2)
|
||
|
"sbrc %[byte], 7" "\n\t" // 1-2 if (b & 128)
|
||
|
"mov %[next], %[hi]" "\n\t" // 0-1 next = hi (T = 4)
|
||
|
"dec %[bit]" "\n\t" // 1 bit-- (T = 5)
|
||
|
"st %a[port], %[next]" "\n\t" // 1 PORT = next (T = 6)
|
||
|
"nop" "\n\t" // 1 nop (T = 7)
|
||
|
"mov %[next] , %[lo]" "\n\t" // 1 next = lo (T = 8)
|
||
|
"breq nextbyte20" "\n\t" // 1-2 if (bit == 0) (from dec above)
|
||
|
"rol %[byte]" "\n\t" // 1 b <<= 1 (T = 10)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 12)
|
||
|
"nop" "\n\t" // 1 nop (T = 13)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 14)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 16)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 18)
|
||
|
"rjmp head20" "\n\t" // 2 -> head20 (next bit out) (T=20)
|
||
|
"nextbyte20:" "\n\t" // (T = 10)
|
||
|
"ldi %[bit] , 8" "\n\t" // 1 bit = 8 (T = 11)
|
||
|
"ld %[byte] , %a[ptr]+" "\n\t" // 2 b = *ptr++ (T = 13)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 14)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 16)
|
||
|
"sbiw %[count], 1" "\n\t" // 2 i-- (T = 18)
|
||
|
"brne head20" "\n" // 2 if (i != 0) -> (next byte) (T=20)
|
||
|
: [port] "+e" (port),
|
||
|
[byte] "+r" (b),
|
||
|
[bit] "+r" (bit),
|
||
|
[next] "+r" (next),
|
||
|
[count] "+w" (i)
|
||
|
: [ptr] "e" (ptr),
|
||
|
[hi] "r" (hi),
|
||
|
[lo] "r" (lo));
|
||
|
|
||
|
// 20 MHz(ish) AVRxt ------------------------------------------------------
|
||
|
#elif (F_CPU >= 19000000UL) && (F_CPU <= 22000000L)
|
||
|
|
||
|
|
||
|
// 25 inst. clocks per bit: HHHHHHHxxxxxxxxLLLLLLLLLL
|
||
|
// ST instructions: ^ ^ ^ (T=0,7,15)
|
||
|
|
||
|
volatile uint8_t next, bit;
|
||
|
|
||
|
hi = *port | pinMask;
|
||
|
lo = *port & ~pinMask;
|
||
|
next = lo;
|
||
|
bit = 8;
|
||
|
|
||
|
asm volatile(
|
||
|
"head20:" "\n\t" // Clk Pseudocode (T = 0)
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi (T = 1)
|
||
|
"sbrc %[byte], 7" "\n\t" // 1-2 if (b & 128)
|
||
|
"mov %[next], %[hi]" "\n\t" // 0-1 next = hi (T = 3)
|
||
|
"dec %[bit]" "\n\t" // 1 bit-- (T = 4)
|
||
|
"nop" "\n\t" // 1 nop (T = 5)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 7)
|
||
|
"st %a[port], %[next]" "\n\t" // 1 PORT = next (T = 8)
|
||
|
"mov %[next] , %[lo]" "\n\t" // 1 next = lo (T = 9)
|
||
|
"breq nextbyte20" "\n\t" // 1-2 if (bit == 0) (from dec above)
|
||
|
"rol %[byte]" "\n\t" // 1 b <<= 1 (T = 11)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 13)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 15)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 16)
|
||
|
"nop" "\n\t" // 1 nop (T = 17)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 19)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 21)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 23)
|
||
|
"rjmp head20" "\n\t" // 2 -> head20 (next bit out)
|
||
|
"nextbyte20:" "\n\t" // (T = 11)
|
||
|
"ldi %[bit] , 8" "\n\t" // 1 bit = 8 (T = 12)
|
||
|
"ld %[byte] , %a[ptr]+" "\n\t" // 2 b = *ptr++ (T = 14)
|
||
|
"nop" "\n\t" // 1 nop (T = 15)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 16)
|
||
|
"nop" "\n\t" // 1 nop (T = 17)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 19)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 21)
|
||
|
"sbiw %[count], 1" "\n\t" // 2 i-- (T = 23)
|
||
|
"brne head20" "\n" // 2 if (i != 0) -> (next byte) ()
|
||
|
: [port] "+e" (port),
|
||
|
[byte] "+r" (b),
|
||
|
[bit] "+r" (bit),
|
||
|
[next] "+r" (next),
|
||
|
[count] "+w" (i)
|
||
|
: [ptr] "e" (ptr),
|
||
|
[hi] "r" (hi),
|
||
|
[lo] "r" (lo));
|
||
|
|
||
|
// 24 (22~26) MHz AVRxt ------------------------------------------------------
|
||
|
#elif (F_CPU >= 22000000UL) && (F_CPU <= 26000000L)
|
||
|
|
||
|
|
||
|
// 30 inst. clocks per bit: HHHHHHHxxxxxxxxLLLLLLLLLL
|
||
|
// ST instructions: ^ ^ ^ (T=0,9,18)
|
||
|
|
||
|
volatile uint8_t next, bit;
|
||
|
|
||
|
hi = *port | pinMask;
|
||
|
lo = *port & ~pinMask;
|
||
|
next = lo;
|
||
|
bit = 8;
|
||
|
|
||
|
|
||
|
asm volatile(
|
||
|
"head24:" "\n\t" // Clk Pseudocode (T = 0)
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi (T = 1)
|
||
|
"sbrc %[byte], 7" "\n\t" // 1-2 if (b & 128)
|
||
|
"mov %[next], %[hi]" "\n\t" // 0-1 next = hi (T = 3)
|
||
|
"dec %[bit]" "\n\t" // 1 bit-- (T = 4)
|
||
|
"nop" "\n\t" // 1 nop (T = 5)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 7)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 9)
|
||
|
"st %a[port], %[next]" "\n\t" // 1 PORT = next (T = 10)
|
||
|
"mov %[next] , %[lo]" "\n\t" // 1 next = lo (T = 11)
|
||
|
"nop" "\n\t" // 1 nop (T = 12)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 14)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 16)
|
||
|
"breq nextbyte24" "\n\t" // 1-2 if (bit == 0) (from dec above)
|
||
|
"rol %[byte]" "\n\t" // 1 b <<= 1 (T = 18)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 19)
|
||
|
"rcall seconddelay24" "\n\t" // 2+4+3=9 (T = 28)
|
||
|
"rjmp head24" "\n\t" // 2 -> head20 (next bit out)
|
||
|
"seconddelay24:" "\n\t" //
|
||
|
"nop" "\n\t" // 1
|
||
|
"rjmp .+0" "\n\t" // 2
|
||
|
"ret" "\n\t" // 4
|
||
|
"nextbyte24:" "\n\t" // last bit of a byte (T = 18)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 19)
|
||
|
"ldi %[bit] , 8" "\n\t" // 1 bit = 8 (T = 20)
|
||
|
"ld %[byte] , %a[ptr]+" "\n\t" // 2 b = *ptr++ (T = 22)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 24)
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop (T = 26)
|
||
|
"sbiw %[count], 1" "\n\t" // 2 i-- (T = 28)
|
||
|
"brne head24" "\n" // 2 if (i != 0) -> (next byte) ()
|
||
|
: [port] "+e" (port),
|
||
|
[byte] "+r" (b),
|
||
|
[bit] "+r" (bit),
|
||
|
[next] "+r" (next),
|
||
|
[count] "+w" (i)
|
||
|
: [ptr] "e" (ptr),
|
||
|
[hi] "r" (hi),
|
||
|
[lo] "r" (lo));
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
// 28 (26~30) MHz AVRxt ------------------------------------------------------
|
||
|
#elif (F_CPU >= 26000000UL) && (F_CPU <= 30000000L)
|
||
|
|
||
|
|
||
|
// 35 inst. clocks per bit: HHHHHHHxxxxxxxxLLLLLLLLLL
|
||
|
// ST instructions: ^ ^ ^ (T=0,10,21)
|
||
|
|
||
|
volatile uint8_t next, bit;
|
||
|
|
||
|
hi = *port | pinMask;
|
||
|
lo = *port & ~pinMask;
|
||
|
next = lo;
|
||
|
bit = 8;
|
||
|
|
||
|
asm volatile(
|
||
|
"head28:" "\n\t" // Clk Pseudocode (T = 0)
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi (T = 1)
|
||
|
"sbrc %[byte], 7" "\n\t" // 1-2 if (b & 128)
|
||
|
"mov %[next], %[hi]" "\n\t" // 0-1 next = hi (T = 3)
|
||
|
"dec %[bit]" "\n\t" // 1 bit-- (T = 4)
|
||
|
"rcall zerothdelay32" "\n\t" // 2+4=6
|
||
|
"st %a[port], %[next]" "\n\t" // 1 PORT = next (T = 11)
|
||
|
"mov %[next] , %[lo]" "\n\t" // 1 next = lo (T = 12)
|
||
|
"rcall firstdelay28" "\n\t" // 2+4 = 7 (T = 19)
|
||
|
"breq nextbyte28" "\n\t" // 1-2 if (bit == 0) (from dec above)
|
||
|
"rol %[byte]" "\n\t" // 1 b <<= 1 (T = 21)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 22)
|
||
|
"rcall seconddelay28" "\n\t" // 2+4+1+4=11 (T = 33)
|
||
|
"rjmp head28" "\n\t" // 2 -> head20 (next bit out)
|
||
|
"seconddelay28:" "\n\t" //
|
||
|
"rjmp .+0" "\n\t" // 2
|
||
|
"rjmp .+0" "\n\t" // 2
|
||
|
"firstdelay28:" "\n\t" // first delay
|
||
|
"nop" "\n\t" // 1 nop
|
||
|
"thirddelay28:" "\n\t" // third delay
|
||
|
"zerothdelay28:" "\n\t"
|
||
|
"ret" "\n\t" // 4
|
||
|
"nextbyte28:" "\n\t" // last bit of a byte (T = 21)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 22)
|
||
|
"ldi %[bit] , 8" "\n\t" // 1 bit = 8 (T = 23)
|
||
|
"ld %[byte] , %a[ptr]+" "\n\t" // 2 b = *ptr++ (T = 25)
|
||
|
"rcall thirddelay28" "\n\t" // 2+4 = 6 (T = 31)
|
||
|
"sbiw %[count], 1" "\n\t" // 2 i-- (T = 33)
|
||
|
"brne head28" "\n" // 2 if (i != 0) -> (next byte) ()
|
||
|
: [port] "+e" (port),
|
||
|
[byte] "+r" (b),
|
||
|
[bit] "+r" (bit),
|
||
|
[next] "+r" (next),
|
||
|
[count] "+w" (i)
|
||
|
: [ptr] "e" (ptr),
|
||
|
[hi] "r" (hi),
|
||
|
[lo] "r" (lo));
|
||
|
|
||
|
|
||
|
// 32 (30~34) MHz AVRxt ------------------------------------------------------
|
||
|
#elif (F_CPU > 30000000UL) && (F_CPU <= 34000000L)
|
||
|
|
||
|
|
||
|
// 40 inst. clocks per bit: HHHHHHHxxxxxxxxLLLLLLLLLL
|
||
|
// ST instructions: ^ ^ ^ (T=0,11,24)
|
||
|
|
||
|
volatile uint8_t next, bit;
|
||
|
|
||
|
hi = *port | pinMask;
|
||
|
lo = *port & ~pinMask;
|
||
|
next = lo;
|
||
|
bit = 8;
|
||
|
|
||
|
asm volatile(
|
||
|
"head32:" "\n\t" // Clk Pseudocode (T = 0)
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi (T = 1)
|
||
|
"sbrc %[byte], 7" "\n\t" // 1-2 if (b & 128)
|
||
|
"mov %[next], %[hi]" "\n\t" // 0-1 next = hi (T = 3)
|
||
|
"dec %[bit]" "\n\t" // 1 bit-- (T = 4)
|
||
|
"rcall zerothdelay32" "\n\t" // 2+4+1=7
|
||
|
"st %a[port], %[next]" "\n\t" // 1 PORT = next (T = 12)
|
||
|
"mov %[next] , %[lo]" "\n\t" // 1 next = lo (T = 13)
|
||
|
"rcall firstdelay32" "\n\t" // 2+4+1+2 = 9 (T = 22)
|
||
|
"breq nextbyte32" "\n\t" // 1-2 if (bit == 0) (from dec above)
|
||
|
"rol %[byte]" "\n\t" // 1 b <<= 1 (T = 24)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 25)
|
||
|
"rcall seconddelay32" "\n\t" // 2+4+3+2+3=13 (T = 38)
|
||
|
"rjmp head32" "\n\t" // 2 -> head20 (next bit out)
|
||
|
"seconddelay32:" "\n\t" // second delay 13 cycles
|
||
|
"rjmp .+0" "\n\t" // 2
|
||
|
"rjmp .+0" "\n\t" // 2
|
||
|
"firstdelay32:" "\n\t" // first delay 9 cycles
|
||
|
"nop" "\n\t" // 1 nop
|
||
|
"thirddelay32:" "\n\t" // third delay 8 cycles
|
||
|
"nop" "\n\t" // 1 nop
|
||
|
"zerothdelay32:" "\n\t" // zeroth delay 7 cycles
|
||
|
"nop" "\n\t" // 1 nop
|
||
|
"ret" "\n\t" // 4
|
||
|
"nextbyte32:" "\n\t" // last bit of a byte (T = 24)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 25)
|
||
|
"ldi %[bit] , 8" "\n\t" // 1 bit = 8 (T = 26)
|
||
|
"ld %[byte] , %a[ptr]+" "\n\t" // 2 b = *ptr++ (T = 28)
|
||
|
"rcall thirddelay32" "\n\t" // 2+4+1+1 = 8 (T = 36)
|
||
|
"sbiw %[count], 1" "\n\t" // 2 i-- (T = 38)
|
||
|
"brne head32" "\n" // 2 if (i != 0) -> (next byte) ()
|
||
|
: [port] "+e" (port),
|
||
|
[byte] "+r" (b),
|
||
|
[bit] "+r" (bit),
|
||
|
[next] "+r" (next),
|
||
|
[count] "+w" (i)
|
||
|
: [ptr] "e" (ptr),
|
||
|
[hi] "r" (hi),
|
||
|
[lo] "r" (lo));
|
||
|
|
||
|
// 36 (34~38) MHz AVRxt ------------------------------------------------------
|
||
|
#elif (F_CPU > 3400000UL) && (F_CPU <= 38000000L)
|
||
|
|
||
|
|
||
|
// 45 inst. clocks per bit: HHHHHHHxxxxxxxxLLLLLLLLLL
|
||
|
// ST instructions: ^ ^ ^ (T=0,12,27)
|
||
|
|
||
|
volatile uint8_t next, bit;
|
||
|
|
||
|
hi = *port | pinMask;
|
||
|
lo = *port & ~pinMask;
|
||
|
next = lo;
|
||
|
bit = 8;
|
||
|
|
||
|
asm volatile(
|
||
|
"head36:" "\n\t" // Clk Pseudocode (T = 0)
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi (T = 1)
|
||
|
"sbrc %[byte], 7" "\n\t" // 1-2 if (b & 128)
|
||
|
"mov %[next], %[hi]" "\n\t" // 0-1 next = hi (T = 3)
|
||
|
"dec %[bit]" "\n\t" // 1 bit-- (T = 4)
|
||
|
"rcall zerothdelay36" "\n\t" // 2+4+2=8
|
||
|
"st %a[port], %[next]" "\n\t" // 1 PORT = next (T = 13)
|
||
|
"mov %[next] , %[lo]" "\n\t" // 1 next = lo (T = 14)
|
||
|
"rcall firstdelay36" "\n\t" // 2+4+3 = 11 (T = 25)
|
||
|
"breq nextbyte36" "\n\t" // 1-2 if (bit == 0) (from dec above)
|
||
|
"rol %[byte]" "\n\t" // 1 b <<= 1 (T = 27)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 28)
|
||
|
"rcall seconddelay36" "\n\t" // 2+4+3+2+2=15 (T = 43)
|
||
|
"rjmp head36" "\n\t" // 2 -> head20 (next bit out)
|
||
|
"seconddelay36:" "\n\t" // second delay 15 cycles
|
||
|
"rjmp .+0" "\n\t" // 2
|
||
|
"rjmp .+0" "\n\t" // 2
|
||
|
"firstdelay36:" "\n\t" // first delay 11 cycles
|
||
|
"nop" "\n\t" // 1 nop
|
||
|
"thirddelay36:" "\n\t" // third delay 10 cycles
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
"zerothdelay36:" "\n\t" // zeroth delay 8 cycles
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
"ret" "\n\t" // 4
|
||
|
"nextbyte36:" "\n\t" // last bit of a byte (T = 27)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 28)
|
||
|
"ldi %[bit] , 8" "\n\t" // 1 bit = 8 (T = 29)
|
||
|
"ld %[byte] , %a[ptr]+" "\n\t" // 2 b = *ptr++ (T = 31)
|
||
|
"rcall thirddelay36" "\n\t" // 2+4 = 10 (T = 41)
|
||
|
"sbiw %[count], 1" "\n\t" // 2 i-- (T = 43)
|
||
|
"brne head36" "\n" // 2 if (i != 0) -> (next byte) ()
|
||
|
: [port] "+e" (port),
|
||
|
[byte] "+r" (b),
|
||
|
[bit] "+r" (bit),
|
||
|
[next] "+r" (next),
|
||
|
[count] "+w" (i)
|
||
|
: [ptr] "e" (ptr),
|
||
|
[hi] "r" (hi),
|
||
|
[lo] "r" (lo));
|
||
|
|
||
|
|
||
|
// 40 (38-44) MHz AVRxt ------------------------------------------------------
|
||
|
#elif (F_CPU > 3800000UL) && (F_CPU <= 44000000L)
|
||
|
|
||
|
|
||
|
// 50 inst. clocks per bit: HHHHHHHxxxxxxxxLLLLLLLLLL
|
||
|
// ST instructions: ^ ^ ^ (T=0,14,30)
|
||
|
|
||
|
volatile uint8_t next, bit;
|
||
|
|
||
|
hi = *port | pinMask;
|
||
|
lo = *port & ~pinMask;
|
||
|
next = lo;
|
||
|
bit = 8;
|
||
|
|
||
|
asm volatile(
|
||
|
"head40:" "\n\t" // Clk Pseudocode (T = 0)
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi (T = 1)
|
||
|
"sbrc %[byte], 7" "\n\t" // 1-2 if (b & 128)
|
||
|
"mov %[next], %[hi]" "\n\t" // 0-1 next = hi (T = 3)
|
||
|
"dec %[bit]" "\n\t" // 1 bit-- (T = 4)
|
||
|
"rcall zerothdelay40" "\n\t" // 2+4+4=10
|
||
|
"st %a[port], %[next]" "\n\t" // 1 PORT = next (T = 15)
|
||
|
"mov %[next] , %[lo]" "\n\t" // 1 next = lo (T = 16)
|
||
|
"rcall firstdelay40" "\n\t" // 2+4+4+2 = 12 (T = 28)
|
||
|
"breq nextbyte40" "\n\t" // 1-2 if (bit == 0) (from dec above)
|
||
|
"rol %[byte]" "\n\t" // 1 b <<= 1 (T = 30)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 31)
|
||
|
"rcall seconddelay40" "\n\t" // 2+4+3+2+3=17 (T = 48)
|
||
|
"rjmp head40" "\n\t" // 2 -> head20 (next bit out)
|
||
|
"seconddelay40:" "\n\t" // second delay 17 cycles
|
||
|
"nop" "\n\t" // 1 nop
|
||
|
"rjmp .+0" "\n\t" // 2
|
||
|
"rjmp .+0" "\n\t" // 2
|
||
|
"thirddelay40:" "\n\t" // third delay 12 cycles
|
||
|
"firstdelay40:" "\n\t" // first delay 12 cycles
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
"zerothdelay40:" "\n\t" // zeroth delay 10 cycles
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
"ret" "\n\t" // 4
|
||
|
"nextbyte40:" "\n\t" // last bit of a byte (T = 30)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 31)
|
||
|
"ldi %[bit] , 8" "\n\t" // 1 bit = 8 (T = 32)
|
||
|
"ld %[byte] , %a[ptr]+" "\n\t" // 2 b = *ptr++ (T = 34)
|
||
|
"rcall thirddelay40" "\n\t" // 2+4+4+2 = 12 (T = 46)
|
||
|
"sbiw %[count], 1" "\n\t" // 2 i-- (T = 48)
|
||
|
"brne head40" "\n" // 2 if (i != 0) -> (next byte) ()
|
||
|
: [port] "+e" (port),
|
||
|
[byte] "+r" (b),
|
||
|
[bit] "+r" (bit),
|
||
|
[next] "+r" (next),
|
||
|
[count] "+w" (i)
|
||
|
: [ptr] "e" (ptr),
|
||
|
[hi] "r" (hi),
|
||
|
[lo] "r" (lo));
|
||
|
|
||
|
// 48 (44-50) MHz AVRxt ------------------------------------------------------
|
||
|
#elif (F_CPU > 4400000UL) && (F_CPU <= 50000000L)
|
||
|
|
||
|
|
||
|
// 60 inst. clocks per bit: HHHHHHHxxxxxxxxLLLLLLLLLL
|
||
|
// ST instructions: ^ ^ ^ (T=0,16,35)
|
||
|
|
||
|
volatile uint8_t next, bit;
|
||
|
|
||
|
hi = *port | pinMask;
|
||
|
lo = *port & ~pinMask;
|
||
|
next = lo;
|
||
|
bit = 8;
|
||
|
asm volatile(
|
||
|
"head48:" "\n\t" // Clk Pseudocode (T = 0)
|
||
|
"st %a[port], %[hi]" "\n\t" // 1 PORT = hi (T = 1)
|
||
|
"sbrc %[byte], 7" "\n\t" // 1-2 if (b & 128)
|
||
|
"mov %[next], %[hi]" "\n\t" // 0-1 next = hi (T = 3)
|
||
|
"dec %[bit]" "\n\t" // 1 bit-- (T = 4)
|
||
|
"rcall zerothdelay48" "\n\t" // 2+4=13
|
||
|
"st %a[port], %[next]" "\n\t" // 1 PORT = next (T = 17)
|
||
|
"mov %[next] , %[lo]" "\n\t" // 1 next = lo (T = 18)
|
||
|
"rcall firstdelay48" "\n\t" // 2+4+3 = 15 (T = 33)
|
||
|
"breq nextbyte48" "\n\t" // 1-2 if (bit == 0) (from dec above)
|
||
|
"rol %[byte]" "\n\t" // 1 b <<= 1 (T = 35)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 36)
|
||
|
"rcall seconddelay48" "\n\t" // 2+4+3+2+3=22 (T = 58)
|
||
|
"rjmp head48" "\n\t" // 2 -> head20 (next bit out)
|
||
|
"seconddelay48:" "\n\t" // second delay 22 cycles
|
||
|
"rjmp .+0" "\n\t" // 2
|
||
|
"rjmp .+0" "\n\t" // 2
|
||
|
"nop" "\n\t" // 1 nop
|
||
|
"thirddelay48:" "\n\t" // third delay 17 cycles
|
||
|
"rjmp .+0" "\n\t" // 2
|
||
|
"firstdelay48:" "\n\t" // first delay 15 cycles
|
||
|
"rjmp .+0" "\n\t" // 2 nop nop
|
||
|
"zerothdelay48:" "\n\t" // zeroth delay 13 cycles
|
||
|
"nop" "\n\t" // 1 nop
|
||
|
"rcall emptydelay48" "\n\t" // 2+4
|
||
|
"ret" "\n\t" // 4
|
||
|
"emptydelay48:" "\n\t" // immediately returns: 2+4 = 6 cycles, for 2 words!
|
||
|
"ret" "\n\t" // 4
|
||
|
"nextbyte48:" "\n\t" // last bit of a byte (T = 35)
|
||
|
"st %a[port], %[lo]" "\n\t" // 1 PORT = lo (T = 36)
|
||
|
"ldi %[bit] , 8" "\n\t" // 1 bit = 8 (T = 37)
|
||
|
"ld %[byte] , %a[ptr]+" "\n\t" // 2 b = *ptr++ (T = 39)
|
||
|
"rcall thirddelay48" "\n\t" // 2+4 = 17 (T = 56)
|
||
|
"sbiw %[count], 1" "\n\t" // 2 i-- (T = 58)
|
||
|
"brne head48" "\n" // 2 if (i != 0) -> (next byte) ()
|
||
|
: [port] "+e" (port),
|
||
|
[byte] "+r" (b),
|
||
|
[bit] "+r" (bit),
|
||
|
[next] "+r" (next),
|
||
|
[count] "+w" (i)
|
||
|
: [ptr] "e" (ptr),
|
||
|
[hi] "r" (hi),
|
||
|
[lo] "r" (lo));
|
||
|
|
||
|
#else
|
||
|
#error "CPU SPEED NOT SUPPORTED"
|
||
|
#endif
|
||
|
|
||
|
// END AVR ----------------------------------------------------------------
|
||
|
|
||
|
interrupts();
|
||
|
#if (!defined(DISABLEMILLIS) && !defined(MILLIS_USE_TIMERRTC) && !defined(MILLIS_USE_TIMERRTC_XTAL) && !defined(MILLIS_USE_TIMERRTC_XOSC))
|
||
|
endTime = micros();
|
||
|
// Save EOD time for latch on next call
|
||
|
#else
|
||
|
#warning "micros is not available based on timer settings. You must ensure at least 50us between calls to show() or the pixels will never latch"
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
// Set the output pin number
|
||
|
void tinyNeoPixel::setPin(uint8_t p) {
|
||
|
pin = p;
|
||
|
port = portOutputRegister(digitalPinToPort(p));
|
||
|
pinMask = digitalPinToBitMask(p);
|
||
|
}
|
||
|
|
||
|
// Set pixel color from separate R,G,B components:
|
||
|
void tinyNeoPixel::setPixelColor(uint16_t n, uint8_t r, uint8_t g, uint8_t b) {
|
||
|
if (n < numLEDs) {
|
||
|
if (brightness) { // See notes in setBrightness()
|
||
|
r = (r * brightness) >> 8;
|
||
|
g = (g * brightness) >> 8;
|
||
|
b = (b * brightness) >> 8;
|
||
|
}
|
||
|
uint8_t *p;
|
||
|
if (wOffset == rOffset) { // Is an RGB-type strip
|
||
|
p = &pixels[n * 3]; // 3 bytes per pixel
|
||
|
} else { // Is a WRGB-type strip
|
||
|
p = &pixels[n * 4]; // 4 bytes per pixel
|
||
|
p[wOffset] = 0; // But only R,G,B passed -- set W to 0
|
||
|
}
|
||
|
p[rOffset] = r; // R,G,B always stored
|
||
|
p[gOffset] = g;
|
||
|
p[bOffset] = b;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void tinyNeoPixel::setPixelColor(uint16_t n, uint8_t r, uint8_t g, uint8_t b, uint8_t w) {
|
||
|
if (n < numLEDs) {
|
||
|
if (brightness) { // See notes in setBrightness()
|
||
|
r = (r * brightness) >> 8;
|
||
|
g = (g * brightness) >> 8;
|
||
|
b = (b * brightness) >> 8;
|
||
|
w = (w * brightness) >> 8;
|
||
|
}
|
||
|
uint8_t *p;
|
||
|
if (wOffset == rOffset) { // Is an RGB-type strip
|
||
|
p = &pixels[n * 3]; // 3 bytes per pixel (ignore W)
|
||
|
} else { // Is a WRGB-type strip
|
||
|
p = &pixels[n * 4]; // 4 bytes per pixel
|
||
|
p[wOffset] = w; // Store W
|
||
|
}
|
||
|
p[rOffset] = r; // Store R,G,B
|
||
|
p[gOffset] = g;
|
||
|
p[bOffset] = b;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Set pixel color from 'packed' 32-bit RGB color:
|
||
|
void tinyNeoPixel::setPixelColor(uint16_t n, uint32_t c) {
|
||
|
if (n < numLEDs) {
|
||
|
uint8_t *p,
|
||
|
r = (uint8_t)(c >> 16),
|
||
|
g = (uint8_t)(c >> 8),
|
||
|
b = (uint8_t)c;
|
||
|
if (brightness) { // See notes in setBrightness()
|
||
|
r = (r * brightness) >> 8;
|
||
|
g = (g * brightness) >> 8;
|
||
|
b = (b * brightness) >> 8;
|
||
|
}
|
||
|
if (wOffset == rOffset) {
|
||
|
p = &pixels[n * 3];
|
||
|
} else {
|
||
|
p = &pixels[n * 4];
|
||
|
uint8_t w = (uint8_t)(c >> 24);
|
||
|
p[wOffset] = brightness ? ((w * brightness) >> 8) : w;
|
||
|
}
|
||
|
p[rOffset] = r;
|
||
|
p[gOffset] = g;
|
||
|
p[bOffset] = b;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
@brief Fill all or part of the NeoPixel strip with a color.
|
||
|
@param c 32-bit color value. Most significant byte is white (for
|
||
|
RGBW pixels) or ignored (for RGB pixels), next is red,
|
||
|
then green, and least significant byte is blue. If all
|
||
|
arguments are unspecified, this will be 0 (off).
|
||
|
@param first Index of first pixel to fill, starting from 0. Must be
|
||
|
in-bounds, no clipping is performed. 0 if unspecified.
|
||
|
@param count Number of pixels to fill, as a positive value. Passing
|
||
|
0 or leaving unspecified will fill to end of strip.
|
||
|
*/
|
||
|
void tinyNeoPixel::fill(uint32_t c, uint16_t first, uint16_t count) {
|
||
|
uint16_t i, end;
|
||
|
|
||
|
if (first >= numLEDs) {
|
||
|
return; // If first LED is past end of strip, nothing to do
|
||
|
}
|
||
|
|
||
|
// Calculate the index ONE AFTER the last pixel to fill
|
||
|
if (count == 0) {
|
||
|
// Fill to end of strip
|
||
|
end = numLEDs;
|
||
|
} else {
|
||
|
// Ensure that the loop won't go past the last pixel
|
||
|
end = first + count;
|
||
|
if (end > numLEDs) end = numLEDs;
|
||
|
}
|
||
|
|
||
|
for (i = first; i < end; i++) {
|
||
|
this->setPixelColor(i, c);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
@brief Convert hue, saturation and value into a packed 32-bit RGB color
|
||
|
that can be passed to setPixelColor() or other RGB-compatible
|
||
|
functions.
|
||
|
@param hue An unsigned 16-bit value, 0 to 65535, representing one full
|
||
|
loop of the color wheel, which allows 16-bit hues to "roll
|
||
|
over" while still doing the expected thing (and allowing
|
||
|
more precision than the wheel() function that was common to
|
||
|
prior NeoPixel examples).
|
||
|
@param sat Saturation, 8-bit value, 0 (min or pure grayscale) to 255
|
||
|
(max or pure hue). Default of 255 if unspecified.
|
||
|
@param val Value (brightness), 8-bit value, 0 (min / black / off) to
|
||
|
255 (max or full brightness). Default of 255 if unspecified.
|
||
|
@return Packed 32-bit RGB with the most significant byte set to 0 -- the
|
||
|
white element of WRGB pixels is NOT utilized. Result is linearly
|
||
|
but not perceptually correct, so you may want to pass the result
|
||
|
through the gamma32() function (or your own gamma-correction
|
||
|
operation) else colors may appear washed out. This is not done
|
||
|
automatically by this function because coders may desire a more
|
||
|
refined gamma-correction function than the simplified
|
||
|
one-size-fits-all operation of gamma32(). Diffusing the LEDs also
|
||
|
really seems to help when using low-saturation colors.
|
||
|
*/
|
||
|
uint32_t tinyNeoPixel::ColorHSV(uint16_t hue, uint8_t sat, uint8_t val) {
|
||
|
|
||
|
uint8_t r, g, b;
|
||
|
|
||
|
// Remap 0-65535 to 0-1529. Pure red is CENTERED on the 64K rollover;
|
||
|
// 0 is not the start of pure red, but the midpoint...a few values above
|
||
|
// zero and a few below 65536 all yield pure red (similarly, 32768 is the
|
||
|
// midpoint, not start, of pure cyan). The 8-bit RGB hexcone (256 values
|
||
|
// each for red, green, blue) really only allows for 1530 distinct hues
|
||
|
// (not 1536, more on that below), but the full unsigned 16-bit type was
|
||
|
// chosen for hue so that one's code can easily handle a contiguous color
|
||
|
// wheel by allowing hue to roll over in either direction.
|
||
|
hue = (hue * 1530L + 32768) / 65536;
|
||
|
// Because red is centered on the rollover point (the +32768 above,
|
||
|
// essentially a fixed-point +0.5), the above actually yields 0 to 1530,
|
||
|
// where 0 and 1530 would yield the same thing. Rather than apply a
|
||
|
// costly modulo operator, 1530 is handled as a special case below.
|
||
|
|
||
|
// So you'd think that the color "hexcone" (the thing that ramps from
|
||
|
// pure red, to pure yellow, to pure green and so forth back to red,
|
||
|
// yielding six slices), and with each color component having 256
|
||
|
// possible values (0-255), might have 1536 possible items (6*256),
|
||
|
// but in reality there's 1530. This is because the last element in
|
||
|
// each 256-element slice is equal to the first element of the next
|
||
|
// slice, and keeping those in there this would create small
|
||
|
// discontinuities in the color wheel. So the last element of each
|
||
|
// slice is dropped...we regard only elements 0-254, with item 255
|
||
|
// being picked up as element 0 of the next slice. Like this:
|
||
|
// Red to not-quite-pure-yellow is: 255, 0, 0 to 255, 254, 0
|
||
|
// Pure yellow to not-quite-pure-green is: 255, 255, 0 to 1, 255, 0
|
||
|
// Pure green to not-quite-pure-cyan is: 0, 255, 0 to 0, 255, 254
|
||
|
// and so forth. Hence, 1530 distinct hues (0 to 1529), and hence why
|
||
|
// the constants below are not the multiples of 256 you might expect.
|
||
|
|
||
|
// Convert hue to R,G,B (nested ifs faster than divide+mod+switch):
|
||
|
if (hue < 510) { // Red to Green-1
|
||
|
b = 0;
|
||
|
if (hue < 255) { // Red to Yellow-1
|
||
|
r = 255;
|
||
|
g = hue; // g = 0 to 254
|
||
|
} else { // Yellow to Green-1
|
||
|
r = 510 - hue; // r = 255 to 1
|
||
|
g = 255;
|
||
|
}
|
||
|
} else if (hue < 1020) { // Green to Blue-1
|
||
|
r = 0;
|
||
|
if (hue < 765) { // Green to Cyan-1
|
||
|
g = 255;
|
||
|
b = hue - 510; // b = 0 to 254
|
||
|
} else { // Cyan to Blue-1
|
||
|
g = 1020 - hue; // g = 255 to 1
|
||
|
b = 255;
|
||
|
}
|
||
|
} else if (hue < 1530) { // Blue to Red-1
|
||
|
g = 0;
|
||
|
if (hue < 1275) { // Blue to Magenta-1
|
||
|
r = hue - 1020; // r = 0 to 254
|
||
|
b = 255;
|
||
|
} else { // Magenta to Red-1
|
||
|
r = 255;
|
||
|
b = 1530 - hue; // b = 255 to 1
|
||
|
}
|
||
|
} else { // Last 0.5 Red (quicker than % operator)
|
||
|
r = 255;
|
||
|
g = b = 0;
|
||
|
}
|
||
|
|
||
|
// Apply saturation and value to R,G,B, pack into 32-bit result:
|
||
|
uint32_t v1 = 1 + val; // 1 to 256; allows >>8 instead of /255
|
||
|
uint16_t s1 = 1 + sat; // 1 to 256; same reason
|
||
|
uint8_t s2 = 255 - sat; // 255 to 0
|
||
|
return ((((((r * s1) >> 8) + s2) * v1) & 0xff00) << 8) |
|
||
|
(((((g * s1) >> 8) + s2) * v1) & 0xff00) |
|
||
|
(((((b * s1) >> 8) + s2) * v1) >> 8);
|
||
|
}
|
||
|
|
||
|
|
||
|
// Query color from previously-set pixel (returns packed 32-bit RGB value)
|
||
|
uint32_t tinyNeoPixel::getPixelColor(uint16_t n) const {
|
||
|
if (n >= numLEDs) {
|
||
|
return 0; // Out of bounds, return no color.
|
||
|
}
|
||
|
|
||
|
uint8_t *p;
|
||
|
|
||
|
if (wOffset == rOffset) { // Is RGB-type device
|
||
|
p = &pixels[n * 3];
|
||
|
if (brightness) {
|
||
|
// Stored color was decimated by setBrightness(). Returned value
|
||
|
// attempts to scale back to an approximation of the original 24-bit
|
||
|
// value used when setting the pixel color, but there will always be
|
||
|
// some error -- those bits are simply gone. Issue is most
|
||
|
// pronounced at low brightness levels.
|
||
|
return (((uint32_t)(p[rOffset] << 8) / brightness) << 16) |
|
||
|
(((uint32_t)(p[gOffset] << 8) / brightness) << 8) |
|
||
|
((uint32_t)(p[bOffset] << 8) / brightness);
|
||
|
} else {
|
||
|
// No brightness adjustment has been made -- return 'raw' color
|
||
|
return ((uint32_t)p[rOffset] << 16) |
|
||
|
((uint32_t)p[gOffset] << 8) |
|
||
|
(uint32_t)p[bOffset];
|
||
|
}
|
||
|
} else { // Is RGBW-type device
|
||
|
p = &pixels[n * 4];
|
||
|
if (brightness) { // Return scaled color
|
||
|
return (((uint32_t)(p[wOffset] << 8) / brightness) << 24) |
|
||
|
(((uint32_t)(p[rOffset] << 8) / brightness) << 16) |
|
||
|
(((uint32_t)(p[gOffset] << 8) / brightness) << 8) |
|
||
|
((uint32_t)(p[bOffset] << 8) / brightness);
|
||
|
} else { // Return raw color
|
||
|
return ((uint32_t)p[wOffset] << 24) |
|
||
|
((uint32_t)p[rOffset] << 16) |
|
||
|
((uint32_t)p[gOffset] << 8) |
|
||
|
(uint32_t)p[bOffset];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Returns pointer to pixels[] array. Pixel data is stored in device-
|
||
|
// native format and is not translated here. Application will need to be
|
||
|
// aware of specific pixel data format and handle colors appropriately.
|
||
|
uint8_t *tinyNeoPixel::getPixels(void) const {
|
||
|
return pixels;
|
||
|
}
|
||
|
|
||
|
uint16_t tinyNeoPixel::numPixels(void) const {
|
||
|
return numLEDs;
|
||
|
}
|
||
|
|
||
|
// Adjust output brightness; 0=darkest (off), 255=brightest. This does
|
||
|
// NOT immediately affect what's currently displayed on the LEDs. The
|
||
|
// next call to show() will refresh the LEDs at this level. However,
|
||
|
// this process is potentially "lossy," especially when increasing
|
||
|
// brightness. The tight timing in the WS2811/WS2812 code means there
|
||
|
// aren't enough free cycles to perform this scaling on the fly as data
|
||
|
// is issued. So we make a pass through the existing color data in RAM
|
||
|
// and scale it (subsequent graphics commands also work at this
|
||
|
// brightness level). If there's a significant step up in brightness,
|
||
|
// the limited number of steps (quantization) in the old data will be
|
||
|
// quite visible in the re-scaled version. For a non-destructive
|
||
|
// change, you'll need to re-render the full strip data.
|
||
|
void tinyNeoPixel::setBrightness(uint8_t b) {
|
||
|
// Stored brightness value is different than what's passed.
|
||
|
// This simplifies the actual scaling math later, allowing a fast
|
||
|
// 8x8-bit multiply and taking the MSB. 'brightness' is a uint8_t,
|
||
|
// adding 1 here may (intentionally) roll over...so 0 = max brightness
|
||
|
// (color values are interpreted literally; no scaling), 1 = min
|
||
|
// brightness (off), 255 = just below max brightness.
|
||
|
uint8_t newBrightness = b + 1;
|
||
|
if (newBrightness != brightness) { // Compare against prior value
|
||
|
// Brightness has changed -- re-scale existing data in RAM
|
||
|
uint8_t c,
|
||
|
*ptr = pixels,
|
||
|
oldBrightness = brightness - 1; // De-wrap old brightness value
|
||
|
uint16_t scale;
|
||
|
if (oldBrightness == 0) {
|
||
|
scale = 0; // Avoid /0
|
||
|
} else if (b == 255) {
|
||
|
scale = 65535 / oldBrightness;
|
||
|
} else {
|
||
|
scale = (((uint16_t)newBrightness << 8) - 1) / oldBrightness;
|
||
|
}
|
||
|
for (uint16_t i = 0; i < numBytes; i++) {
|
||
|
c = *ptr;
|
||
|
*ptr++ = (c * scale) >> 8;
|
||
|
}
|
||
|
brightness = newBrightness;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//Return the brightness value
|
||
|
uint8_t tinyNeoPixel::getBrightness(void) const {
|
||
|
return brightness - 1;
|
||
|
}
|
||
|
|
||
|
void tinyNeoPixel::clear() {
|
||
|
memset(pixels, 0, numBytes);
|
||
|
}
|
||
|
|
||
|
// A 32-bit variant of gamma8() that applies the same function
|
||
|
// to all components of a packed RGB or WRGB value.
|
||
|
uint32_t tinyNeoPixel::gamma32(uint32_t x) {
|
||
|
uint8_t *y = (uint8_t *)&x;
|
||
|
// All four bytes of a 32-bit value are filtered even if RGB (not WRGB),
|
||
|
// to avoid a bunch of shifting and masking that would be necessary for
|
||
|
// properly handling different endianisms (and each byte is a fairly
|
||
|
// trivial operation, so it might not even be wasting cycles vs a check
|
||
|
// and branch for the RGB case). In theory this might cause trouble *if*
|
||
|
// someone's storing information in the unused most significant byte
|
||
|
// of an RGB value, but this seems exceedingly rare and if it's
|
||
|
// encountered in reality they can mask values going in or coming out.
|
||
|
for (uint8_t i=0; i<4; i++) y[i] = gamma8(y[i]);
|
||
|
return x; // Packed 32-bit return
|
||
|
}
|
||
|
// *INDENT-ON*
|