@@ -1021,143 +1021,93 @@ void Adafruit_NeoPixel::show(void) {
10211021
10221022// Arduino 101 -----------------------------------------------------------
10231023
1024- PinDescription *pindesc = &g_APinDescription[pin];
1025- uint8_t *p = pixels, *end = p + numBytes;
1026- register uint8_t pix, mask;
1024+ #define NOPx7 { __builtin_arc_nop (); \
1025+ __builtin_arc_nop (); __builtin_arc_nop (); \
1026+ __builtin_arc_nop (); __builtin_arc_nop (); \
1027+ __builtin_arc_nop (); __builtin_arc_nop (); }
10271028
1029+ PinDescription *pindesc = &g_APinDescription[pin];
1030+ register uint32_t loop = 8 * numBytes; // one loop to handle all bytes and all bits
1031+ register uint8_t *p = pixels;
1032+ register uint32_t currByte = (uint32_t ) (*p);
1033+ register uint32_t currBit = 0x80 & currByte;
1034+ register uint32_t bitCounter = 0 ;
1035+ register uint32_t first = 1 ;
1036+
1037+ // The loop is unusual. Very first iteration puts all the way LOW to the wire -
1038+ // constant LOW does not affect NEOPIXEL, so there is no visible effect displayed.
1039+ // During that very first iteration CPU caches instructions in the loop.
1040+ // Because of the caching process, "CPU slows down". NEOPIXEL pulse is very time sensitive
1041+ // that's why we let the CPU cache first and we start regular pulse from 2nd iteration
10281042 if (pindesc->ulGPIOType == SS_GPIO) {
10291043 register uint32_t reg = pindesc->ulGPIOBase + SS_GPIO_SWPORTA_DR;
1030- register uint32_t reg_val = __builtin_arc_lr ((volatile uint32_t )reg);
1044+ uint32_t reg_val = __builtin_arc_lr ((volatile uint32_t )reg);
10311045 register uint32_t reg_bit_high = reg_val | (1 << pindesc->ulGPIOId );
10321046 register uint32_t reg_bit_low = reg_val & ~(1 << pindesc->ulGPIOId );
10331047
1034- while (p < end) {
1035- pix = *p++;
1036- for (mask = 0x80 ; mask; mask >>= 1 ) {
1037- __builtin_arc_sr (reg_bit_high, (volatile uint32_t )reg);
1038- if (pix & mask) {
1039- __builtin_arc_nop ();
1040- __builtin_arc_nop ();
1041- __builtin_arc_nop ();
1042- __builtin_arc_nop ();
1043- __builtin_arc_nop ();
1044- __builtin_arc_nop ();
1045- __builtin_arc_nop ();
1046- __builtin_arc_nop ();
1047- __builtin_arc_nop ();
1048- __builtin_arc_nop ();
1049- __builtin_arc_nop ();
1050- __builtin_arc_nop ();
1051- __builtin_arc_nop ();
1052- __builtin_arc_nop ();
1053- __builtin_arc_nop ();
1054- __builtin_arc_nop ();
1055- __builtin_arc_sr (reg_bit_low, (volatile uint32_t )reg);
1056- __builtin_arc_nop ();
1057- __builtin_arc_nop ();
1058- __builtin_arc_nop ();
1059- __builtin_arc_nop ();
1060- __builtin_arc_nop ();
1061- __builtin_arc_nop ();
1062- __builtin_arc_nop ();
1063- __builtin_arc_nop ();
1064- } else {
1065- __builtin_arc_nop ();
1066- __builtin_arc_nop ();
1067- __builtin_arc_nop ();
1068- __builtin_arc_nop ();
1069- __builtin_arc_nop ();
1070- __builtin_arc_nop ();
1071- __builtin_arc_nop ();
1072- __builtin_arc_nop ();
1073- __builtin_arc_nop ();
1074- __builtin_arc_nop ();
1075- __builtin_arc_sr (reg_bit_low, (volatile uint32_t )reg);
1076- __builtin_arc_nop ();
1077- __builtin_arc_nop ();
1078- __builtin_arc_nop ();
1079- __builtin_arc_nop ();
1080- __builtin_arc_nop ();
1081- __builtin_arc_nop ();
1082- __builtin_arc_nop ();
1083- __builtin_arc_nop ();
1084- __builtin_arc_nop ();
1085- __builtin_arc_nop ();
1086- __builtin_arc_nop ();
1087- __builtin_arc_nop ();
1088- __builtin_arc_nop ();
1089- __builtin_arc_nop ();
1090- __builtin_arc_nop ();
1091- __builtin_arc_nop ();
1092- }
1048+ loop += 1 ; // include first, special iteration
1049+ while (loop--) {
1050+ if (!first) {
1051+ currByte <<= 1 ;
1052+ bitCounter++;
1053+ }
1054+
1055+ // 1 is >550ns high and >450ns low; 0 is 200..500ns high and >450ns low
1056+ __builtin_arc_sr (first ? reg_bit_low : reg_bit_high, (volatile uint32_t )reg);
1057+ if (currBit) { // ~400ns HIGH (740ns overall)
1058+ NOPx7
1059+ NOPx7
1060+ }
1061+ // ~340ns HIGH
1062+ NOPx7
1063+ __builtin_arc_nop ();
1064+
1065+ // 820ns LOW; per spec, max allowed low here is 5000ns */
1066+ __builtin_arc_sr (reg_bit_low, (volatile uint32_t )reg);
1067+ NOPx7
1068+ NOPx7
1069+
1070+ if (bitCounter >= 8 ) {
1071+ bitCounter = 0 ;
1072+ currByte = (uint32_t ) (*++p);
10931073 }
1074+
1075+ currBit = 0x80 & currByte;
1076+ first = 0 ;
10941077 }
10951078 } else if (pindesc->ulGPIOType == SOC_GPIO) {
10961079 register uint32_t reg = pindesc->ulGPIOBase + SOC_GPIO_SWPORTA_DR;
1097- register uint32_t reg_val = MMIO_REG_VAL (reg);
1080+ uint32_t reg_val = MMIO_REG_VAL (reg);
10981081 register uint32_t reg_bit_high = reg_val | (1 << pindesc->ulGPIOId );
10991082 register uint32_t reg_bit_low = reg_val & ~(1 << pindesc->ulGPIOId );
11001083
1101- while (p < end) {
1102- pix = *p++;
1103- for (mask = 0x80 ; mask; mask >>= 1 ) {
1104- MMIO_REG_VAL (reg) = reg_bit_high;
1105- if (pix & mask) {
1106- __builtin_arc_nop ();
1107- __builtin_arc_nop ();
1108- __builtin_arc_nop ();
1109- __builtin_arc_nop ();
1110- __builtin_arc_nop ();
1111- __builtin_arc_nop ();
1112- __builtin_arc_nop ();
1113- __builtin_arc_nop ();
1114- __builtin_arc_nop ();
1115- __builtin_arc_nop ();
1116- __builtin_arc_nop ();
1117- __builtin_arc_nop ();
1118- __builtin_arc_nop ();
1119- __builtin_arc_nop ();
1120- __builtin_arc_nop ();
1121- __builtin_arc_nop ();
1122- MMIO_REG_VAL (reg) = reg_bit_low;
1123- __builtin_arc_nop ();
1124- __builtin_arc_nop ();
1125- __builtin_arc_nop ();
1126- __builtin_arc_nop ();
1127- __builtin_arc_nop ();
1128- __builtin_arc_nop ();
1129- __builtin_arc_nop ();
1130- __builtin_arc_nop ();
1131- } else {
1132- __builtin_arc_nop ();
1133- __builtin_arc_nop ();
1134- __builtin_arc_nop ();
1135- __builtin_arc_nop ();
1136- __builtin_arc_nop ();
1137- __builtin_arc_nop ();
1138- __builtin_arc_nop ();
1139- __builtin_arc_nop ();
1140- __builtin_arc_nop ();
1141- __builtin_arc_nop ();
1142- MMIO_REG_VAL (reg) = reg_bit_low;
1143- __builtin_arc_nop ();
1144- __builtin_arc_nop ();
1145- __builtin_arc_nop ();
1146- __builtin_arc_nop ();
1147- __builtin_arc_nop ();
1148- __builtin_arc_nop ();
1149- __builtin_arc_nop ();
1150- __builtin_arc_nop ();
1151- __builtin_arc_nop ();
1152- __builtin_arc_nop ();
1153- __builtin_arc_nop ();
1154- __builtin_arc_nop ();
1155- __builtin_arc_nop ();
1156- __builtin_arc_nop ();
1157- __builtin_arc_nop ();
1158- __builtin_arc_nop ();
1159- }
1084+ loop += 1 ; // include first, special iteration
1085+ while (loop--) {
1086+ if (!first) {
1087+ currByte <<= 1 ;
1088+ bitCounter++;
11601089 }
1090+ MMIO_REG_VAL (reg) = first ? reg_bit_low : reg_bit_high;
1091+ if (currBit) { // ~430ns HIGH (740ns overall)
1092+ NOPx7
1093+ NOPx7
1094+ __builtin_arc_nop ();
1095+ }
1096+ // ~310ns HIGH
1097+ NOPx7
1098+
1099+ // 850ns LOW; per spec, max allowed low here is 5000ns */
1100+ MMIO_REG_VAL (reg) = reg_bit_low;
1101+ NOPx7
1102+ NOPx7
1103+
1104+ if (bitCounter >= 8 ) {
1105+ bitCounter = 0 ;
1106+ currByte = (uint32_t ) (*++p);
1107+ }
1108+
1109+ currBit = 0x80 & currByte;
1110+ first = 0 ;
11611111 }
11621112 }
11631113
0 commit comments