An STM32 Arduino IDE Button Debouncing Framework

What follows is a very basic framework for button debouncing. It works on an STM32 and is demonstrated in the following code on an STM32F446RE Nucleo board running at the default 84MHz. It uses timer #2 (TIM2), but could be modified to use another timer. The code was written primarily for the Arduino IDE and STM32duino core, however it could easily be adapted for other environments.

This demo simply uses the builtin Nucleo button, but additional buttons can be added by expanding the buttons array. The comments in the code should be adequate to explain how it works.

This Arduino IDE code requires the HAL TIM module only, so add a “build_opt.h” file with the following definition:

-DHAL_TIM_MODULE_ONLY

The basic code is included in this demonstration file:

#include <cassert>
#include "clamp.h"

#ifdef __cplusplus
extern "C"
#endif

// Number of buttons used.
constexpr size_t NUM_BUTTONS{1};

// Define our button (built in).
#define USER_BUTTON PC13
#define BLUE_BUTTON 0

// Debounce specifics (modify per system clock MHz).
#define DEBOUNCE_THESHOLD 7
#define COUNT_MIN         0
#define COUNT_MAX         15

// Specifics for each defined button.
typedef struct Button 
{
  GPIO_TypeDef* port = nullptr;
  uint16_t pin = NULL;
  bool state = false;
  bool latch = false; 
  uint8_t count = 0; 
} button_t;

// Array of buttons.
button_t buttons[NUM_BUTTONS];


// Debounce functionality called by timer interrupt.
void debounce() 
{
  assert(NUM_BUTTONS != 0);
  
  // Iterate through all defined buttons.
  for (int i=0; i<NUM_BUTTONS; i++)
  {
    // Check for incomplete button defintion.
    if (buttons[i].port == nullptr)
      continue;

    // Read button pin, adjust count.
    if (buttons[i].count != COUNT_MAX && !HAL_GPIO_ReadPin(buttons[i].port, buttons[i].pin))
      buttons[i].count++;
    else if (buttons[i].count)
      buttons[i].count--;

    // For the paranoid, force min/max value on count.
    //buttons[i].count = clamp<uint8_t>(buttons[i].count, COUNT_MIN, COUNT_MAX);

    // Debounce upon count > threshold value.
    if (buttons[i].count > DEBOUNCE_THESHOLD)
      buttons[i].state = true;
    else 
      buttons[i].state = false;
  }
}

TIM_HandleTypeDef timerInstance;

extern "C" void TIM2_IRQHandler() { HAL_TIM_IRQHandler(&timerInstance); }
extern "C" void HAL_TIM_PeriodElapsedCallback(TIM_HandleTypeDef *htim) { debounce(); }

void initTimer() 
{
  // TIM2 by default has clock of 84MHz (MCU freq).
  // Set value of prescaler and period, so update event is every 1ms:
  // Update Event (Hz) = timer_clock/((TIM_Prescaler + 1)*(TIM_Period + 1))
  // Update Event (Hz) = 84MHz / ((0 + 1) * (83999 + 1)) = 1ms.
  __TIM2_CLK_ENABLE();

  timerInstance.Instance = TIM2;
  timerInstance.Init.Period = 83999;
  timerInstance.Init.Prescaler = 0;
  timerInstance.Init.CounterMode = TIM_COUNTERMODE_UP;
  timerInstance.Init.ClockDivision = TIM_CLOCKDIVISION_DIV1;
  timerInstance.Init.RepetitionCounter = 0;
  HAL_NVIC_SetPriority(TIM2_IRQn, 0, 0);
  HAL_NVIC_EnableIRQ(TIM2_IRQn);
  HAL_TIM_Base_Init(&timerInstance);
  HAL_TIM_Base_Start_IT(&timerInstance);
}

void initButtons()
{
  // Define button port/pin and set mode of pin.
  buttons[BLUE_BUTTON].port = GPIOC; 
  buttons[BLUE_BUTTON].pin = GPIO_PIN_13;
  pinMode(USER_BUTTON, INPUT);
}

void setup() 
{
  pinMode(LED_BUILTIN, OUTPUT);
  HAL_GPIO_WritePin(GPIOA, GPIO_PIN_5, GPIO_PIN_RESET);

  initButtons();
  initTimer();
}

void loop() 
{
  // Detect a new button press.
  if (buttons[BLUE_BUTTON].state && !buttons[BLUE_BUTTON].latch)
  {
    buttons[BLUE_BUTTON].latch = true;
    // Perform action on button press.
    HAL_GPIO_TogglePin(GPIOA, GPIO_PIN_5);
  }

  // Detect button release.
  if (!buttons[BLUE_BUTTON].state && buttons[BLUE_BUTTON].latch)
    buttons[BLUE_BUTTON].latch = false;
}

Contents of “clamp.h” file:

#include <algorithm>

template <class T> 
inline const T& _max(const T& a, const T& b) { return (a < b) ? b : a; }

template <class T> 
inline const T& _min(const T& a, const T& b) { return !(b < a) ? a : b; }

template <typename T>
inline T clamp(T& val, T lo, T hi) { return _max(lo, _min(hi, val)); }
Posted in Uncategorized | Leave a comment

Arduino stdlib Library Replacement

Excludes memory allocation functions. Compiled from various sources.

Just because.

ctype_.h

// ctype_ header.
#ifndef _CTYPE_H
#define _CTYPE_H

#ifdef __cplusplus
extern "C" {
#endif

inline int isalnum_(int c) { return ((c >= '0' && c <= '9') || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z')); }
inline int isalpha_(int c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); }
inline int iscntrl_(int c) { return ((c & ~0x1f) == 0 || c == 0x7f); }
inline int isdigit_(int c) { return c >= '0' && c <= '9'; }
inline int isgraph_(int c) { return (c >= '!' && c <= '~'); }
inline int islower_(int c) { return (c >= 'a' && c <= 'z'); }
inline int isprint_(int c) { return (c >= ' ' && c <= '~'); }
inline int ispunct_(int c) { return ((c >= '!' && c <= '~') && !((c >= '0' && c <= '9') || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'))); }
inline int isspace_(int c) { return (c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'); }
inline int isupper_(int c) { return (c >= 'A' && c <= 'Z'); }
inline int isxdigit_(int c) { return ((c >= '0' && c <= '9') || ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'F')); }

inline int tolower_(int const c)
{
    if (c >= 'A' && c <= 'Z')
        return c - ('A' - 'a');

    return c;
}

inline int toupper_(int const c)
{
    if (c >= 'a' && c <= 'z')
        return c - ('a' - 'A');

    return c;
}

inline int isascii_(int c) { return (c >= 0x00 && c <= 0x7f); }
inline int isblank_(int c) { return (c == ' ' || c == '\t'); }

#ifdef __cplusplus
}
#endif

#endif

stdio_.h

// stdio_ header.
#ifndef _STDIO_H
#define _STDIO_H

#ifdef __cplusplus
extern "C" {
#endif

#define NULL      (void *)0
#define size_t    int

#ifdef __cplusplus
}
#endif

#endif

stdlib_.h

// stdlib_ header.
#ifndef _STDLIB_H
#define _STDLIB_H

#include "stdio_.h"
#include "string_.h"
#include "ctype_.h"
#include <limits.h>
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

#define _RAND_MAX 32767
#define MAX_BUF   256 // chunk to copy on swap.

typedef struct 
{
  int quot;
  int rem;
} div_t;

typedef struct 
{
  long quot;
  long rem;
} ldiv_t;

typedef int compare(const void*, const void*);
// Example of compare function:
// int compare(const void* a, const void* b) {
//   const int* ia = (const int *)a; 
//   const int* ib = (const int *)b;
//   return *ia  - *ib; 
// }

int abs_(int);
long labs_(long);
int atoi_(const char*);
long atol_(const char*);
unsigned long atoul_(const char*);
double atof_(const char*);
div_t div_(int, int);
ldiv_t ldiv_(long, long);
void qsort_(void*, size_t, size_t, compare*);
int rand_(void);
void srand_(unsigned int);
char* itoa_(int, char*, int);
char* utoa_(unsigned, char*, int);
char* ltoa_(long, char*, int);
char* ultoa_(unsigned long, char*, int);

union int32_float_t 
{
  int32_t long_;
  float float_;
};

#ifndef FLT_MIN_EXP
#define FLT_MIN_EXP (-999)
#endif
#ifndef FLT_MAX_EXP
#define FLT_MAX_EXP (999)
#endif

#define _FTOA_TOO_LARGE -2  // |input| > 2147483520 
#define _FTOA_TOO_SMALL -1  // |input| < 0.0000001 

// precision 0-9
#define PRECISION 7

void ftoa_(float f, char *p, int *status);

#ifdef __cplusplus
}
#endif

#endif

stdlib_.c

#include "stdlib_.h"

#ifdef __cplusplus
extern "C" {
#endif

// the seed.
unsigned long _Randseed = 1;

// Compute absolute value of int argument.
int abs_(int i) { return ((i < 0) ? -i : i); }

// Compute absolute value of long argument.
long labs_(long i) { return ((i < 0) ? -i : i); }

// Convert a string to an unsigned long integer.  
unsigned long _Stoul(const char* nptr, char** endptr, register int base)
{
    register const char* s = nptr;
    register unsigned long acc;
    register int c;
    register unsigned long cutoff;
    register int neg = 0, any, cutlim;

    do {
        c = *s++;
    } while (isspace_(c));

    if (c == '-')
    {
        neg = 1;
        c = *s++;
    }
    else if (c == '+')
        c = *s++;

    if ((base == 0 || base == 16) && c == '0' && (*s == 'x' || *s == 'X'))
    {
        c = s[1];
        s += 2;
        base = 16;
    }
    
    if (base == 0)
        base = c == '0' ? 8 : 10;
    
    cutoff = (unsigned long)ULONG_MAX / (unsigned long)base;
    cutlim = (unsigned long)ULONG_MAX % (unsigned long)base;

    for (acc = 0, any = 0;; c = *s++)
    {
        if (isdigit_(c))
            c -= '0';
        else if (isalpha_(c))
            c -= isupper_(c) ? 'A' - 10 : 'a' - 10;
        else
            break;

        if (c >= base)
            break;

        if (any < 0 || acc > cutoff || acc == cutoff && c > cutlim)
            any = -1;
        else
        {
            any = 1;
            acc *= base;
            acc += c;
        }
    }

    if (any < 0)
    {
        acc = ULONG_MAX;
        //errno = ERANGE;
    }
    else if (neg)
        acc = -(long)acc;

    if (endptr != 0)
        *endptr = (char*)(any ? s - 1 : nptr);

    return (acc);
}

// Convert string to int.
int atoi_(const char* s) { return ((int)_Stoul(s, NULL, 10)); }

// Convert string to long.
long atol_(const char* s) { return ((long)_Stoul(s, NULL, 10)); }

// Convert string to unsigned long.
unsigned long atoul_(const char* s) { return ((unsigned long)_Stoul(s, NULL, 10)); }

// Convert string to double.
double atof_(const char* s)
{
    double a = 0.0;
    int e = 0;
    int c;

    while ((c = *s++) != '\0' && isdigit_(c))
        a = a * 10.0 + (c - '0');

    if (c == '.')
    {
        while ((c = *s++) != '\0' && isdigit_(c))
        {
            a = a * 10.0 + (c - '0');
            e = e - 1;
        }
    }

    if (c == 'e' || c == 'E')
    {
        int sign = 1;
        int i = 0;

        c = *s++;
        if (c == '+')
            c = *s++;
        else if (c == '-')
        {
            c = *s++;
            sign = -1;
        }
    
        while (isdigit_(c))
        {
            i = i * 10 + (c - '0');
            c = *s++;
        }
        
        e += i * sign;
    }

    while (e > 0)
    {
        a *= 10.0;
        e--;
    }

    while (e < 0)
    {
        a *= 0.1;
        e++;
    }

    return a;
}

char* ltoa_(long value, char* string, int radix)
{
    char tmp[33];
    char* tp = tmp;
    long i;
    unsigned long v;
    int sign;
    char* sp;

    if (string == NULL)
        return 0;

    if (radix > 36 || radix <= 1)
        return 0;

    sign = (radix == 10 && value < 0);
    if (sign)
        v = -value;
    else
        v = (unsigned long)value;

    while (v || tp == tmp)
    {
        i = v % radix;
        v = v / radix;
        
        if (i < 10)
            *tp++ = (char)(i + '0');
        else
            *tp++ = (char)(i + 'a' - 10);
    }

    sp = string;

    if (sign)
        *sp++ = '-';
        
    while (tp > tmp)
        *sp++ = *--tp;
    
    *sp = 0;

    return string;
}

char* ultoa_(unsigned long value, char* string, int radix)
{
    char tmp[33];
    char* tp = tmp;
    long i;
    unsigned long v = value;
    char* sp;

    if (string == NULL)
        return 0;

    if (radix > 36 || radix <= 1)
        return 0;

    while (v || tp == tmp)
    {
        i = v % radix;
        v = v / radix;
        
        if (i < 10)
            *tp++ = (char)(i + '0');
        else
            *tp++ = (char)(i + 'a' - 10);
    }

    sp = string;

    while (tp > tmp)
        *sp++ = *--tp;
    
    *sp = 0;

    return string;
}

char* itoa_(int value, char* string, int radix) { return ltoa_(value, string, radix); }
char* utoa_(unsigned value, char* string, int radix) { return ultoa_(value, string, radix); }

// Sort (char base[size])[n] using quicksort.
void qsort_(void* base, size_t n, size_t size, compare* cmp)
{  
    while (1 < n)
    {
        // worth sorting.
        size_t i = 0;
        size_t j = n - 1;
        char* qi = (char*)base;
        char* qj = qi + size * j;
        char* qp = qj;

        while (i < j)
        {
            // partition about pivot.
            while (i < j && (*cmp)(qi, qp) <= 0)
                ++i, qi += size;
            
            while (i < j && (*cmp)(qp, qj) <= 0)
                --j, qj -= size;
            
            if (i < j)
            { 
                // swap elements i and j.
                char buf[MAX_BUF];
                char* q1 = qi;
                char* q2 = qj;
                size_t m, ms;

                for (ms = size; 0 < ms; ms -= m, q1 += m, q2 += m)
                { 
                    // swap as many as possible.
                    m = ms < sizeof(buf) ? ms : sizeof(buf);
                    memcpy_(buf, q1, m);
                    memcpy_(q1, q2, m);
                    memcpy_(q2, buf, m);
                }
                
                ++i, qi += size;
            }
        }

        if (qi != qp)
        { 
            // swap elements i and pivot.
            char buf[MAX_BUF];
            char* q1 = qi;
            char* q2 = qp;
            size_t m, ms;

            for (ms = size; 0 < ms; ms -= m, q1 += m, q2 += m)
            { 
                // swap as many as possible.
                m = ms < sizeof(buf) ? ms : sizeof(buf);
                memcpy_(buf, q1, m);
                memcpy_(q1, q2, m);
                memcpy_(q2, buf, m);
            }
        }

        j = n - i - 1, qi += size;
        
        if (j < i)
        { 
            // recurse on smaller partition.
            if (1 < j)
                qsort_(qi, j, size, cmp);
            
            n = i;
        }
        else
        { 
            // lower partition is smaller.
            if (1 < i)
                qsort_(base, i, size, cmp);

            base = qi;
            
            n = j;
        }
    }
}

// Compute pseudo-random value.
int rand_(void)
{ 
    _Randseed = _Randseed * 1103515245 + 12345;
    return ((unsigned int)(_Randseed >> 16) & _RAND_MAX);
}

// Alter the seed.
void srand_(unsigned int seed) { _Randseed = seed; }

// Compute int quotient and remainder.
div_t div_(int numer, int denom)
{ 
    div_t val;

    val.quot = numer / denom;
    val.rem = numer - denom * val.quot;
    if (val.quot < 0 && 0 < val.rem)
    { 
        // fix remainder with wrong sign.
        val.quot += 1;
        val.rem -= denom;
    }

    return (val);
}

// Compute long quotient and remainder.
ldiv_t ldiv_(long numer, long denom)
{
    ldiv_t val;

    val.quot = numer / denom;
    val.rem = numer - denom * val.quot;

    if (val.quot < 0 && 0 < val.rem)
    {
        // fix remainder with wrong sign.
        val.quot += 1;
        val.rem -= denom;
    }

    return (val);
}

void ftoa_(float f, char *p, int *status) 
{
  int32_t mantissa, intPart, fracPart;
  int16_t exp2;
  int32_float_t x;

  *status = 0;
  if (f == 0.0) 
  {
    *p++ = '0';
    *p++ = '.';
    *p++ = '0';
    *p = 0;
    return;
  }
  x.float_ = f;
  exp2 = (unsigned char)(x.long_ >> 23) - 127;
  mantissa = (x.long_ & 0xFFFFFF) | 0x800000;
  fracPart = 0;
  intPart = 0;

  if (exp2 >= 31) 
  {
    *status = _FTOA_TOO_LARGE;
    return;
  } 
  else if (exp2 < -23) 
  {
    *status = _FTOA_TOO_SMALL;
    return;
  }
  else if (exp2 >= 23) 
    intPart = mantissa << (exp2 - 23);
  else if (exp2 >= 0) 
  {
    intPart = mantissa >> (23 - exp2);
    fracPart = (mantissa << (exp2 + 1)) & 0xFFFFFF;
  } 
  else 
  {
    //if (exp2 < 0)
    fracPart = (mantissa & 0xFFFFFF) >> -(exp2 + 1);
  }

  if (x.long_ < 0)
      *p++ = '-';
  
  if (intPart == 0)
    *p++ = '0';
  else 
  {
    ltoa_(intPart, p, 10);
    while (*p)
      p++;
  }
  
  *p++ = '.';
  
  if (fracPart == 0)
    *p++ = '0';
  else 
  {
    char m;

    for (m=0; m<PRECISION; m++) 
    {
      //fracPart *= 10;
      fracPart = (fracPart << 3) + (fracPart << 1); 
      *p++ = (fracPart >> 24) + '0';
      fracPart &= 0xFFFFFF;
    }

    // Delete trailing zeroes.
    for (--p; p[0] == '0' && p[-1] != '.'; --p);
    ++p;
  }
  
  *p = 0;
}

#ifdef __cplusplus
}
#endif
Posted in Uncategorized | Leave a comment

Arduino Inline Assembly string Library

This is a refactoring of the existing Arduino (avr) C string library. This was simply an academic exercise. This code should compile to the same size, since in most respects it is the same code. The majority of the comments were just copied from the original source code. Note, that the function names have an underscore appended to them. Beware, some of the functions have not been tested. Therefore, please check functionality before using any of this code.

string_.h

// string_ header
#ifndef _STRING_H
#define _STRING_H

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

void* memchr_(const void* s, int c, size_t n);
int memcmp_(const void* s1, const void* s2, size_t n);
void* memcpy_(void* s1, const void* s2, size_t n);
void* memmove_(void*, const void*, size_t) __attribute__ ((naked)); 
void* memset_(void* s, int c, size_t n);
void* memmem_(const void* s1, size_t len1, const void* s2, size_t len2);
char* strcat_(char* s1, const char* s2);
char* strchr_(const char* s, int c);
int strcmp_(const char* s1, const char* s2);
char* strcpy_(char* s1, const char* s2);
size_t strcspn_(const char* s1, const char* s2);
inline size_t strlen_(const char* s) __attribute__ ((naked)); 
char* strncat_(char* s1, const char* s2, size_t n);
int strncmp_(const char* s1, const char* s2, size_t n);
char* strncpy_(char* s1, const char* s2, size_t n);
char* strpbrk_(const char* s1, const char* s2);
char* strrchr_(const char* s, int c);
size_t strspn_(const char* s1, const char* s2);
char* strstr_(const char* s1, const char* s2);
char* strtok_r_(char* s, const char* delim, char** last);
char* strtok_(char* s1, const char* s2);
char* _strtok(char *s, const char *delim);
int strncasecmp_(const char* s1, const char* s2, size_t n);
int strcasecmp_(const char* s1, const char* s2);  
char* strchrnul_(const char* s, int c);
char* strlwr_(char* s);
char* strupr_(char* s);
void* strrev_(char *s);
size_t strnlen_(const char* s, size_t maxlen);
char* strsep_(register char **stringp, register const char *delim);
char* strdup_(const char *s1);
size_t strlcpy_(char *dst, const char *src, size_t siz);
size_t strlcat_(char *dst, const char *src, size_t siz);

#ifdef __cplusplus
}
#endif
#endif

string_.c

#include "string_.h"

#ifdef __cplusplus
extern "C" {
#endif

// Find first occurrence of c in s[n].
void* memchr_(const void* s, int c, size_t n)
{
  register char* ret asm("r24");

  __asm__ __volatile__ (
    "1:                  \n"
    "subi  %A1, 1        \n"
    "sbci  %B1, 0        \n"
    "brcs  2f            \n"
    "ld  __tmp_reg__, Z+ \n"
    "cp  __tmp_reg__, %3 \n"
    "brne 1b             \n"
    "sbiw  ZL, 1         \n"
    "movw  %0, ZL        \n"
    "ret                 \n"
    "2:                  \n"
    "clr %A0             \n"
    "clr %B0             \n"
    : "=r" (ret) : "r" (n), "z" (s), "r" (c) : "memory"
  );
  
  return ret;
}


// Compare unsigned char s1[n], s2[n].
int memcmp_(const void* s1, const void* s2, size_t n) 
{
  register int ret asm("r24");

  __asm__ __volatile__ (
    "rjmp 2f              \n"
    "1:                   \n"
    "ld   %0, X+          \n"
    "ld   __tmp_reg__, Z+ \n"
    "sub  %0, __tmp_reg__ \n"
    "brne 3f              \n"
    "2:                   \n"
    "subi %A1, 1          \n"
    "sbci %B1, 0          \n"
    "brcc 1b              \n"
    // strings are equal, clear both r24 and carry
    "sub  %A0, %A0        \n"
    "3:                   \n"
    "sbc   %B0, %B0       \n"
    : "=r" (ret) : "r" (n), "z" (s2), "x" (s1)
  );
  
  return ret;
}


// Copy char s2[n] to s1[n] in any order.
void* memcpy_(void* dst, const void* src, size_t n)
{
  __asm__ __volatile__ (
    // 11 words, (13 + len * 8) cycles
    "rjmp 2f              \n"
    "1:                   \n"
    "ld   __tmp_reg__, Z+ \n"
    "st   X+, __tmp_reg__ \n"
    "2:                   \n"
    "subi %A0, 1          \n"
    "sbci %B0, 0          \n"
    "brcc 1b              \n"
      : : "r" (n), "z" (src), "x" (dst) : "memory"
  );
/*  
  __asm__ __volatile__ (
    // if OPTIMIZE_SPEED
    //15 words, (14 + len * 6 - (len & 1)) cycles
    "sbrs %0, 0           \n"
    "rjmp 3f              \n"
    "rjmp 2f              \n"
    "1:                   \n"
    "ld   __tmp_reg__, Z+ \n"
    "st   X+, __tmp_reg__ \n"
    "2:                   \n"
    "ld   __tmp_reg__, Z+ \n"
    "st   X+, __tmp_reg__ \n"
    "3:                   \n"
    "subi %A0, 2          \n"
    "sbci %B0, 0          \n"
    "brcc 1b              \n"
    : : "r" (n), "z" (src), "x" (dst) : "memory"
  );
*/  
}


// Copy char src[n] to dst[n] safely.
void* memmove_(void* dst, const void* src, size_t n)
{
  // if src < dest, we have to copy in reverse order
  // otherwise memcpy will do the right thing
  __asm__ __volatile__ (
    "cp   r22, r24 \n" // src, dst
    "cpc  r23, r25 \n"
    "brcc 3f       \n"
    
    "movw ZL, r22  \n" // src
    "movw XL, r24  \n" // dst
    "add  ZL, r20  \n" // len
    "adc  ZH, r21  \n"
    "add  XL, r20  \n"
    "adc  XH, r21  \n"
    "rjmp 2f       \n"
    
    "1:                   \n"
    "ld   __tmp_reg__, -Z \n"
    "st   -X, __tmp_reg__ \n"
    
    "2:           \n"
    "subi r20, 1  \n" // len
    "sbci r21, 0  \n"
    "brcc 1b      \n"
    // return dest (unchanged)
    "ret          \n"
    
    "3:           \n"
    "rjmp memcpy_ \n"
  );
}


// Store c throughout unsigned char s[n].
void* memset_(void* s, int c, size_t n)
{
/*
  __asm__ __volatile__ (
    // if OPTIMIZE_SPEED
    // 11 words, (12 + len * 4 - (len & 1)) cycles
    "sbrs %0, 0  \n" // n
    "rjmp 3f     \n"
    "rjmp 2f     \n"
    "1:          \n"
    "st   X+, %1 \n" // c
    "2:          \n"
    "st   X+, %1 \n  // c
    "3:          \n"
    "subi %A0, 2 \n" // n
    "sbci %B0, 0 \n"
    "brcc 1b     \n"
    : : "r" (n), "r" (c), "x" (s)
  );
*/
  __asm__ __volatile__ (
    // 8 words, (11 + len * 6) cycles
    "rjmp 2f     \n"
    "1:          \n"
    "st   X+, %1 \n"
    "2:          \n"
    "subi %A0, 1 \n" 
    "sbci %B0, 0 \n"
    "brcc 1b     \n"
    : : "r" (n), "r" (c), "x" (s)
  );
}

// Find start of first occurrence of substring s2 of length len2 in memory area s1 of length len1.
void* memmem_(const void* s1, size_t len1, const void* s2, size_t len2)
{
  register void* _s1 asm("r24");
  register void* _s2 asm("r20");
  register void* ret asm("r24");
  register uint8_t beg2 asm("r17"); // begin of s2: s2[0]
  register uint8_t c1 asm("r16");   // char from s1[]
  //__tmp_reg__ char from s2[]: tuned for classic lpm instr.

  __asm__ __volatile__ (
    "cp   %A1, __zero_reg__ \n"
    "cpc  %B1, __zero_reg__ \n"
    "breq .L_ret \n"     // s2[] is empty
  
    "add  %A1, %A5 \n"   // len2 = &(s2[len2])
    "adc  %B1, %B5 \n"
    "add  %A0, %A4 \n"   // len1 = &(s1[len1])
    "adc  %B0, %B4 \n"

    "movw ZL, %5 \n"
    "ld   %2, Z+ \n"     // beg2 = s2[0]
    "movw %5, ZL \n"     // save: address of s2[1]
  
    "1: \n" 
    "movw XL, %A6 \n"    // goto to begin of s1[]

    "2: \n"
    "cp   XL, %A0 \n"    // find first char that is matched
    "cpc  XH, %B0 \n"
    "brsh .L_nomatch \n"
    "ld   %3, X+ \n"
    "cp   %3, %2 \n"
    "brne 2b \n"

    "movw %A6, XL \n"    // store address

    "movw ZL, %5 \n"
    "3: \n" 
    "cp   ZL, %A1 \n"    // compare strings
    "cpc  ZH, %B1 \n"
    "brsh .L_match \n"   // end of s2[] --> OK
    "cp   XL, %A0 \n"
    "cpc  XH, %B0 \n"
    "brsh .L_nomatch \n" // s1[] tail is too short
    "ld   %3, X+ \n"
    "ld   __tmp_reg__, Z+ \n"
    "cp   %3, __tmp_reg__ \n"
    "breq 3b \n"
    "rjmp 1b \n"         // no equal
  
    ".L_nomatch: \n"
    "ldi  %A6, 1 \n"
    "ldi  %B6, 0 \n"
    ".L_match: \n"
    "sbiw %A6, 1 \n"     // restore after post-increment
    ".L_ret: \n"
     : : "r" (len1), "r" (len2), "r" (beg2), "r" (c1), "r" (_s1), "r" (_s2), "r" (ret) 
  );
}


// copy char s2[] to end of s1[].
char* strcat_(char* s1, const char* s2)
{
  __asm__ __volatile__ (
    "1:                   \n"
    "ld   __tmp_reg__, X+ \n"
    "tst  __tmp_reg__     \n"
    "brne 1b              \n"
    "sbiw XL, 1           \n"
    "2:                   \n"
    "ld   __tmp_reg__, Z+ \n"
    "st   X+, __tmp_reg__ \n"
    "tst  __tmp_reg__     \n"
    "brne 2b              \n"
    : : "z" (s2), "x" (s1)
  );
}


// find first occurrence of c in char s[].
char* strchr_(const char* s, int c)
{
  register char* ret asm("r24");
  
  __asm__ __volatile__ (
    "1: \n"
    "ld   %0, Z+ \n"
    "cp   %0, %2 \n"
    "breq 2f \n"
    "tst  %A0 \n"   
    "brne 1b \n"
    // not found, return NULL pointer
    "clr  %B0 \n"   
    "ret \n"
    "2: \n"
    "sbiw ZL, 1 \n" 
    "movw %0, ZL \n"
      : "=r" (ret) : "z" (s), "r" (c)
  );
  
  return ret; 
}


// compare unsigned char s1[], s2[].
int strcmp_(const char* s1, const char* s2)
{
  register int ret asm("r24");
  
  __asm__ __volatile__ (
    //movw ZL, r22 //s2
    //movw XL, r24 //s1
    "1: \n"
    "ld   %0, X+ \n"
    "ld   __tmp_reg__, Z+ \n"
    "sub  %0, __tmp_reg__ \n"
    "cpse __tmp_reg__, __zero_reg__ \n"
    "breq 1b \n"
    // ret_hi = SREG.C ? 0xFF : 0
    "sbc  %B0, %B0 \n"
      : "=r" (ret) : "z" (s2), "x" (s1)
  );
    
  return ret;
}


// copy char s2[] to s1[].
char* strcpy_(char* s1, const char* s2)
{
  __asm__ __volatile__ (
    // 9 words, (14 + strlen(src) * 7) cycles
    "1:                   \n"
    "ld   __tmp_reg__, Z+ \n"
    "st   X+, __tmp_reg__ \n"
    "tst  __tmp_reg__     \n"
    "brne 1b              \n"
    : : "z" (s2), "x" (s1)
  );
}


// find index of first s1[i] that matches any s2[].
size_t strcspn_(const char* s1, const char* s2)
{
  register size_t n asm("r24");
  register uint8_t chs asm("r21");
  
  __asm__ __volatile__(
    // get next symbol from s1[]
    "1:                             \n"
    "ld   %1, X+                    \n"   // *s++
    "tst  %1                        \n"
    "breq 3f                        \n"
    // check an occurance
    "movw ZL, r22                   \n" 
    "2:                             \n"
    "ld   __tmp_reg__, Z+           \n"
    "cp   __tmp_reg__, %1           \n"
    "cpse __tmp_reg__, __zero_reg__ \n"
    "brne 2b                        \n"
      // branch if chs is't present in reject[]
    "brne  1b                       \n"
    // end of scanning: chs is found or end of s[] is reached
    // Return: X - 1 - str == X + ~str
    "3:                             \n"
    "com  %A0                       \n"
    "com  %B0                       \n"
    "add  %A0, XL                   \n"
    "adc  %B0, XH                   \n"
    : "=r" (n), "=r" (chs) : "x" (s1) //, "z" (s2)
  );
}


// find length of s[].
size_t strlen_(const char* s)
{
  register size_t len asm("r24");
  
  __asm__ __volatile__(
    // 10 words, (14 + strlen(src) * 5) cycles
    "1:                   \n"
    "ld   __tmp_reg__, Z+ \n"
    "tst  __tmp_reg__     \n"
    "brne 1b              \n"
    // Z points one character past the terminating NUL
    // return Z - 1 - src = (-1 - src) + Z = ~src + Z
    "com %A0              \n" 
    "com %B0              \n" 
    "add %A0, ZL          \n" 
    "adc %B0, ZH          \n" 
    : "=r" (len) : "z" (s)
  );
}


// copy char s2[max n] to end of s1[].
char* strncat_(char *dst, const char *src, size_t len)
{
  __asm__ __volatile__(
    "1:                   \n"
    "ld   __tmp_reg__, X+ \n"
    "tst  __tmp_reg__     \n"
    "brne 1b              \n"
    "sbiw XL, 1           \n"  // undo post-increment (point the the NUL)
    "2:                   \n"
    "subi %A0, 1          \n" // len
    "sbci %B0, 0          \n"
    "brcs 3f              \n"
    "ld   __tmp_reg__, Z+ \n"
    "tst  __tmp_reg__     \n"
    "st   X+, __tmp_reg__ \n"
    "brne 2b              \n"
    // return dst (unchanged)
    "ret                  \n"
    "3:                   \n"
    "st   X, __zero_reg__ \n"
    : "=r" (len) : "x" (dst), "z" (src)
  );
}


// compare unsigned char s1[max n], s2[max n].
int strncmp_(const char *s1, const char *s2, size_t len)
{
  register int ret asm("r24");

  __asm__ __volatile__(
    "1:                    \n"
    "subi %A1, 1           \n" // len
    "sbci %B1, 0           \n"
    "brcs 2f               \n"
    "ld   %0, X+           \n" 
    "ld   __tmp_reg__, Z+  \n"
    "sub  %A0, __tmp_reg__ \n" 
    "brne 3f               \n"
    "tst  __tmp_reg__      \n"
    "brne 1b               \n"
    "2:                    \n"
    "sub  %A0, %A0         \n" // clear ret and C flag
    "3:                    \n"
    "sbc  %B0, %B0         \n"
    : "=r" (ret) :  "r" (len), "x" (s1), "z" (s2)
  );
  
  return ret;
}

// copy char s2[max n] to s1[n].
char* strncpy_(char *dst, const char *src, size_t len)
{
  __asm__ __volatile__(
    "1:                    \n"
    "subi %A0, 1           \n"
    "sbci %B0, 0           \n"
    "brcs 4f               \n"
    "ld   __tmp_reg__, Z+  \n"
    "st   X+, __tmp_reg__  \n"
    "tst  __tmp_reg__      \n"
    "brne 1b               \n"
    // store null characters up to the end of dst
    // as the glibc manual says: This behavior is rarely useful, but specified by ISO C standard.
    "rjmp 3f               \n"
    "2:                    \n"
    "st   X+, __zero_reg__ \n"
    "3:                    \n"
    "subi %A0, 1           \n"
    "sbci %B0, 0           \n"
    "brcc 2b               \n"
    "4:                    \n"
    : :  "r" (len), "x" (dst), "z" (src)
  );
}


// find index of first s1[i] that matches any s2[].
char* strpbrk_(const char *s, const char *accept)
{
  register char* ret asm("r24");

  __asm__ __volatile__(
    "clr  %B0                        \n" // a trick to save 1 word
    // get next symbol from s[]
    "1:                              \n"
    "ld   %0, X+                     \n" // ret, *s++
    "tst  %A0                        \n"
    "breq 3f                         \n"
    // check an occurance
    "movw ZL, r22                    \n"
    "2:                              \n"
    "ld   __tmp_reg__, Z+            \n"
    "cp   r0, r24                    \n"
    "cpse  __tmp_reg__, __zero_reg__ \n"
    "brne 2b                         \n"
    "brne 1b                         \n" // branch if end of accept[] is reached
    // OK, is found
    "sbiw XL, 1                      \n"
    "movw %A0, XL                    \n"
    "3:                              \n"
    : "=r" (ret) : "x" (s)
  );

  return ret;
}


// find last occurrence of c in char s[].
char* strrchr_(const char *src, int val)
{
  register char* ret asm("r24");

  __asm__ __volatile__(
  "ldi  %A0, 1           \n" // ret, NULL + 1
  "ldi  %B0, 0           \n"
  "1:                    \n"
  "ld   __tmp_reg__, Z+  \n"
  "cp   __tmp_reg__, %A2 \n" //val_lo
  "brne 2f               \n"
  "movw %0, ZL           \n" // ret, remember this character was here
  "2:                    \n"
  "tst  __tmp_reg__      \n"
  "brne 1b               \n"
  "sbiw %0, 1            \n" // ret, undo post-increment
  : "=r" (ret) : "z" (src), "r" (val)
  );

  return ret;
}

// find index of first s1[i] that matches no s2[].
size_t strspn_(const char *s, const char *accept)
{
  register size_t ret asm("r24");
  register uint8_t chs asm("r21"); // char from s[]
  
  __asm__ __volatile__(
// This realization is compact, but is not very fast: an accept string is not cached.
    // get next symbol from s[]
    "1:                              \n"
    "ld   %2, X+                     \n" // *s++
    "tst  %2                         \n"
    "breq 3f                         \n"
    // check an occurance
    "movw ZL, r22                    \n"
    "2:                              \n"
    "ld   __tmp_reg__, Z+            \n"
    "cp   __tmp_reg__, %2            \n"
    "cpse  __tmp_reg__, __zero_reg__ \n"
    "brne 2b                         \n"
    "breq 1b                         \n" // branch if chs is present in accept[]
    // end of scanning: chs is not found or end of s[] is reached
    // Return: X - 1 - str == X + ~str
    "3:                              \n"
    "com  %A0                        \n"
    "com  %B0                        \n"
    "add  %A0, XL                    \n"
    "adc  %B0, XH                    \n"
    : "=r" (ret) : "x" (s), "r" (chs)
  );
  
  return ret;
}


// find first occurrence of s2[] in s1[].
char* strstr_(const char *s1, const char *s2)
{
  register char* ret asm("r24");
  register uint8_t chr1 asm("r20"); 
  register uint8_t beg2 asm("r21"); // begin of s2: s2[0]
  
  __asm__ __volatile__(
    "ld   %3, Z+           \n"
    "tst  %3               \n" // is str2 empty?
    "breq done             \n" // return original string (req'd by standard)
    "movw r22, ZL          \n" // save: address of second s2 byte
    "0:                    \n"
    "movw XL, r24          \n" // s1
    "1:                    \n"
    "ld   %2, X+           \n" // Find first char
    "cp   %2, %3           \n"
    "cpse %2, __zero_reg__ \n"
    "brne 1b               \n"
    "brne no_match         \n" // end of s1
    "movw %0, XL           \n" // store return value
    "2:                    \n"
    "ld   __tmp_reg__, Z+  \n" // compare strings
    "tst  __tmp_reg__      \n"
    "breq match            \n" // end of s2
    "ld   %2, X+           \n"
    "cp   %2, __tmp_reg__  \n"
    "cpse %2, __zero_reg__ \n" // break, if end of s1
    "breq 2b               \n"
    "movw ZL, r22          \n" // restore s2+1
    "cpse %2, __zero_reg__ \n"
    "rjmp 0b               \n"
    "no_match:             \n"
    "ldi  %A0, 1           \n" // s1
    "ldi  %B0, 0           \n"
    "match:                \n"
    "sbiw %0, 1            \n" // restore after post-increment
    "done:                 \n"
    : "=r" (ret) : "z" (s2), "r" (chr1), "r" (beg2)
  );
    
  return ret;
}


// find next token in s1[] delimited by s2[].
char* strtok_r_(char *str, const char *delim, char **last)
{
  register uint8_t dch asm("r18");
  register char* _dlm asm("r22");
  register char* _str asm("r24");

  __asm__ __volatile__(
    "ld   XL, Z+           \n" // X = *last
    "ld   XH, Z            \n"
    // check str
    "sbiw %3, 0            \n" // str
    "brne 1f               \n"
    "sbiw XL, 0            \n"
    "breq 9f               \n" // end of string
    "movw %3, XL           \n" // continue parsing
    // skip delimeters
    "1:                    \n"
    "movw XL, %3           \n" // p = str
    "2:                    \n"
    "movw %3, XL           \n"
    "ld   __tmp_reg__, X+  \n"
    "tst  __tmp_reg__      \n"
    "brne 3f               \n"
    "movw %3, __tmp_reg__  \n" // <r0, r1>
    "rjmp 8f               \n"
    "3:                    \n"
    "movw ZL, %2           \n" // delim
    "4:                    \n"
    "ld   %1, Z+           \n" // dch
    "tst  %1               \n" // dch
    "breq 5f               \n" // goto find
    "cp   %1, __tmp_reg__  \n" // dch
    "brne 4b               \n"
    "rjmp 2b               \n" // skip 1 byte
    // find new token end
    "5:                    \n" 
    "movw ZL, %2           \n" // delim
    "6:                    \n"
    "ld   %1, Z+           \n" // dch
    "cp   %1, __tmp_reg__  \n" // dch, __tmp_reg__ != 0
    "brne 7f               \n"
    "st   -X, __zero_reg__ \n"
    "adiw XL, 1            \n"
    "rjmp 9f               \n"
    "7:                    \n"
    "tst  %1               \n" // dch
    "brne 6b               \n"
    // next str byte
    "ld   __tmp_reg__, X+  \n"
    "tst  __tmp_reg__      \n"
    "brne 5b               \n"
    // stop parsing
    "8:                    \n"
    "movw XL, __tmp_reg__  \n" // <r0,r1>
    // save last pointer
    "9:                    \n"
    "movw ZL, r20          \n" // *last = X
    "st   Z+, XL           \n"
    "st   Z, XH            \n"
    : : "z" (last), "r" (dch), "r" (_dlm), "r" (_str)
  );
}

static char *p;

char* _strtok(char *s, const char *delim) { return strtok_r_(s, delim, &p); } 


// compare caseless unsigned char s1[max n], s2[max n].
int strncasecmp_(const char* s1, const char* s2, size_t len)
{
  register uint8_t ret asm("r24");
  register uint8_t tmp asm("r22");
  
  __asm__ __volatile__ (
    "1:                    \n"
    "subi %A2, 1           \n" // if (--len == -1) return 0
    "sbci %B2, 0           \n"
    "brlo 5f               \n"
    "ld   %A3, X+          \n" // *s1++
    "cpi  %A3, 'A'         \n" // if in [A-Z] then tolower()
    "brlt 2f               \n"
    "cpi  %A3, 'Z' + 1     \n"
    "brge 2f               \n"
    "subi %A3, 'A' - 'a'    \n"
    "2:                    \n"
    "ld   %4, Z+           \n" // *s2++
    "cpi  %4, 'A'          \n" // if in [A-Z] then tolower()
    "brlt 3f               \n"
    "cpi  %4, 'Z' + 1      \n"
    "brge 3f               \n"
    "subi %4, 'A' - 'a'    \n"
    "3:                    \n"
    "sub  %A3, %4          \n" // compare
    "cpse %4, __zero_reg__ \n" // break, if end of string
    "breq 1b               \n"
    "4:                    \n"
    "sbc  %B3, %B3         \n" // sign extension
    "ret                   \n"
    "5:                    \n"
    "sub  %A3, %A3         \n" // length limit, return 0
    "rjmp 4b               \n"
    : : "x" (s1), "z" (s2), "r" (len), "r" (ret), "r" (tmp)
  );
}


// Compare two strings ignoring case.
int strcasecmp_(const char *s1, const char *s2)
{
  register uint8_t ret asm("r24");
  register uint8_t tmp asm("r22");
  
  __asm__ __volatile__ (
    "1:                    \n"
    "ld   %A2, X+          \n" // *s1++
    "cpi  %A2, 'A'         \n" // if in [A-Z] then tolower()
    "brlt 2f               \n"
    "cpi  %A2, 'Z' + 1     \n"
    "brge 2f               \n"
    "subi %A2, 'A' - 'a'    \n"
    "2:                    \n"
    "ld   %3, Z+           \n" // *s2++
    "cpi  %3, 'A'          \n" // if in [A-Z] then tolower()
    "brlt 3f               \n"
    "cpi  %3, 'Z' + 1      \n"
    "brge 3f               \n"
    "subi %3, 'A' - 'a'    \n"
    "3:                    \n"
    "sub  %A2, %3          \n" // compare
    "cpse %3, __zero_reg__ \n" // break, if end of string
    "breq 1b               \n"
    "sbc  %B2, %B2         \n" // sign extension
    : : "x" (s1), "z" (s2), "r" (ret), "r" (tmp)
  );
}


// Like strchr() except that if c is not found in s, then it returns a pointer to the null byte at the end of s, rather than NULL.
char* strchrnul_(const char *s, int c)
{
  register char* ret asm("r24");

  __asm__ __volatile__ (
    "1:                             \n"  
    "ld   __tmp_reg__, Z+           \n"
    "cp   __tmp_reg__, %2           \n"
    "cpse __tmp_reg__, __zero_reg__ \n"
    "brne 1b                        \n"
    "sbiw ZL, 1                     \n"  // undo post-increment
    "movw %1, ZL                    \n"
    : : "z" (s), "r" (ret), "r" (c)
  );
}


// Convert a string to lower case.
char* strlwr_(char *s)
{
  register uint8_t temp asm("r22");

  __asm__ __volatile__ (
    "1:   \n"
    "ld   %1, X \n"
    "subi %1, 'A' \n"
    "cpi  %1, 'Z' - 'A' + 1 \n"
    "brlo 2f \n"               // if temp is A..Z, then temp += 'a'-'A'
    "subi %1, 'a' - 'A' \n"    // else restore temp
    "2:   \n"
    "subi %1, -'a' \n"
    "st   X+, %1 \n"
    "brne 1b \n"               // Z for temp
    : : "x" (s), "r" (temp)
  );
}


// Convert a string to upper case.
char *strupr_(char *s)
{
  register uint8_t temp asm("r22");

  __asm__ __volatile__ (
    "1: \n"
    "ld   %1, X \n"
    "subi %1, 'a' \n"
    "cpi  %1, 'z' - 'a' + 1 \n"
    "brlo 2f \n"             // if temp is a..z, then temp += 'A'-'a'
    "subi %1, 'A' - 'a' \n"  // else restore temp
    "2: \n"
    "subi %1, -'A' \n"
    "st   X+, %1 \n"
    "brne 1b \n"             // Z for temp
    : : "x" (s), "r" (temp)
  );
}


// Reverse the order of the string.
char* strrev_(char *s)
{
  register uint8_t rtmp asm("r22");
  register uint8_t ltmp asm("r23");
  
  __asm__ __volatile__ (
    // find end of string
    "1:           \n" 
    "mov  %1, %2  \n" // to obtain right nonzero character
    "ld   %2, Z+  \n"
    "tst  %2      \n"
    "brne 1b      \n"
    "sbiw ZL, 2   \n" // to last nonzero byte
    "rjmp 3f      \n"
    // swap bytes
    "2:           \n"
    "ld   %2, X   \n"
    "st   X+, %1  \n"
    "st   Z, %2   \n"
    "ld   %1, -Z  \n"
    "3:           \n"
    "cp   XL, ZL  \n"
    "cpc  XH, ZH  \n"
    "brlo 2b      \n"
    : : "z" (s), "r" (rtmp), "r" (ltmp)
  );
}


// Returns the number of chars in string pointed to by src, not including the terminating '\0' char, but at most len.
size_t strnlen_(const char* src, size_t len)
{
  register size_t ret asm("r24");

  __asm__ __volatile__ (
    "1: \n"
    "subi %A1, 1 \n"
    "sbci %B1, 0 \n"
    "ld  __tmp_reg__, Z+ \n"
    "cpse  __tmp_reg__, __zero_reg__ \n"
    "brcc 1b \n"
    // Z points one character past the terminating NUL
    // return Z - 1 - src = (-1 - src) + Z = ~src + Z
    "com  %A2 \n"
    "com  %B2 \n"
    "add  %A2, ZL \n"
    "adc  %B2, ZH \n"
    : : "z" (src), "r" (len), "r" (ret)
  );
}


// Parse a string into tokens.
char* strsep_(char **sp, const char *delim)
{
  register char* str asm("r20");
  register uint8_t chr asm("r19");
  
  __asm__ __volatile__ (
    // check a NULL pointer
    "ld   XL, Z            \n" // str address
    "ldd  XH, Z + 1        \n"
    "movw %1, XL           \n"// save for return
    "adiw XL, 0            \n"
    "breq 5f               \n" // return NULL
    // get a symbol from str
    "1:                    \n"
    "ld   %2, X+           \n"
    // scan delim[]
    "movw ZL, r22          \n" // delim
    "2:                    \n"
    "ld   __tmp_reg__, Z+  \n"
    "cp   __tmp_reg__, %2  \n"
    "cpse  __tmp_reg__, __zero_reg__ \n"
    "brne 2b \n" // if symbol is't match && no delim end
    "brne 1b \n" // if symbol is absent in delim[] && not a zero
    // chr is founded in delim[] (possible, it is a terminating zero of str)
    "tst  __tmp_reg__      \n" // the same, as chr
    "brne 3f               \n"
    "movw XL, __tmp_reg__  \n"
    "rjmp 4f               \n"
    // OK, delimeter symbol is founded
    "3:                    \n"
    "st   -X, __zero_reg__ \n" // replace by '\0'
    "adiw XL, 1            \n" // address of next token
    // save result to *sp and return
    "4:                    \n"
    "movw ZL, r24          \n"
    "st   Z, XL            \n"
    "std  Z+1, XH          \n"
    "5:                    \n"
    "movw r24, %1          \n" // return original address
    : : "z" (sp), "r" (str), "r" (chr)
  );
}


// Allocate memory and copy string into it, including the terminating null character.
char* strdup_(const char *s)
{
  __asm__ __volatile__ (
    "1: \n"
    "ld __tmp_reg__, Z+ \n"
    "tst __tmp_reg__ \n"
    "brne 1b \n"
    "com r24 \n"
    "com r25 \n"
    "add r24, r30 \n"
    "adc r25, r31 \n"
    "ldi r24, 1 \n"
    "ldi r25, 0 \n"
    "call malloc \n"
    "sbiw r24, 0 \n"
    "breq 3f \n"
    "movw %0, %1 \n"
    "movw r26, r24 \n"
    "2: \n"
    "ld __tmp_reg__, Z+ \n"
    "st X+, __tmp_reg__ \n"
    "tst __tmp_reg__ \n"
    "brne 2b \n"
    "3: \n"
    : : "z" (s), "y" (s)
  );
}


// Copy src to string dst of size siz.
size_t strlcpy_(char *dst, const char *src, size_t siz)
{
  register size_t ret asm("r24");

  __asm__ __volatile__ (
    // copy loop
    "1:   \n"
    "subi %A2, 1 \n"
    "sbci %B2, 0 \n"
    "brcs 4f \n"     // is possible with siz == 0
    "breq 3f \n"     // --> siz chars copied
    "ld  __tmp_reg__, Z+ \n"
    "st  X+, __tmp_reg__ \n"
    "tst __tmp_reg__ \n"
    "brne 1b \n"
    // calculate result (Z - 1 - src) and return
    "2: \n"
    "sub  ZL, %A1 \n"
    "sbc  ZH, %B1 \n"
    "sbiw ZL, 1 \n"
    "movw %A3, ZL \n"
    "ret \n"
    // terminate dst
    "3: \n"
    "st  X, __zero_reg__ \n"
    // find src end
    "4:   \n"
    "ld  __tmp_reg__, Z+ \n"
    "tst __tmp_reg__ \n"
    "brne 4b \n"
    "rjmp  2b \n"
    : : "x" (dst), "z" (src), "r" (siz), "r" ("ret")
  );
}



// Append src to string dst of size siz.
size_t strlcat_(char *dst, const char *src, size_t siz)
{
  register size_t ret asm("r24");

  __asm__ __volatile__ (
    // find end of dst: X := dst + strlen(dsr)
    "1:                   \n"
    "subi %A2, 1          \n"
    "sbci %B2, 0          \n"
    "brlo .Len            \n" // siz <= strlen(dst)
    "ld  __tmp_reg__, X+  \n"
    "tst __tmp_reg__      \n"
    "brne 1b              \n"
    "sbiw XL, 1           \n"
    "rjmp 3f              \n"
    // copy loop
    "2:                   \n"
    "ld  __tmp_reg__, Z+  \n"
    "st  X+, __tmp_reg__  \n"
    "tst __tmp_reg__      \n"
    "breq .Ldd            \n"
    "3:                   \n"
    "subi %A2, 1          \n"
    "sbci %B2, 2          \n"
    "brsh 2b              \n"
    "st   X, __zero_reg__ \n"
    // return (d - dst + strlen(s))
    "movw r22, ZL         \n" // update for strlen(s) calculation
    ".Len:                \n"
    "ld  __tmp_reg__, Z+  \n" // find end of src
    "tst __tmp_reg__      \n"
    "brne .Len            \n"
    "sub  ZL, %A2         \n" // Z := strlen(s) + 1
    "sbc  ZH, %B2         \n"
    "add  XL, ZL          \n" // d += strlen(s) + 1
    "adc  XH, ZH          \n"
    ".Ldd:                \n"
    "sec                  \n" // d -= dst + 1
    "sbc  XL, %A3         \n"
    "sbc  XH, %B3         \n"
    "movw %3, XL          \n" // return value
    : : "x" (dst), "z" (src), "r" (siz), "r" ("ret")
  );
}

#ifdef __cplusplus
}
#endif
Posted in Uncategorized | Leave a comment

STM32F446RE Nucleo RTC Timestamp

This program demonstrates the RTC timestamp feature of the STM32F4 family of MCUs. The timestamp is activated by one of two external pins (PA0 or PC13 on the STM32F446). Since the PC13 pin is connected to the user button on the Nucleo board, we use it in this demonstration to activate the timestamp. The timestamp feature can be used in either polling or interrupt mode. This demonstration utilizes the interrupt and callback. The ISR in this code simply flags a timestamp overflow by turning on the LED.

The code should be self explanatory. This code is compiled inside the Arduino IDE using the official STM 2.0 Core.

// RTC timestamp demo.
// 1. timestamp occurs when user button is pressed.
// 2. include a sketch level "build_opt.h" file with "-DHAL_RTC_MODULE_ONLY"

// LSI
#define RTC_ASYNCH_PREDIV    0x7F
#define RTC_SYNCH_PREDIV     0x00F9
// LSE
//#define RTC_ASYNCH_PREDIV  0x7F
//#define RTC_SYNCH_PREDIV   0x00FF

RTC_HandleTypeDef RtcHandle;
RTC_TimeTypeDef sTimeStamp;
RTC_DateTypeDef sDateStamp;

volatile bool stamp {false};

static void RTC_TimeStampConfig(void) 
{
  RTC_DateTypeDef sDate;
  RTC_TimeTypeDef sTime;

  // RTC TimeStamp generation: TimeStamp Rising Edge on PC13 Pin 
  HAL_RTCEx_SetTimeStamp_IT(&RtcHandle, RTC_TIMESTAMPEDGE_RISING, RTC_TIMESTAMPPIN_PC13);
  
  // Set a date & time.
  sTime.Hours = 0x1;
  sTime.Minutes = 0x2;
  sTime.Seconds = 0x3;
  sTime.DayLightSaving = RTC_DAYLIGHTSAVING_NONE;
  sTime.StoreOperation = RTC_STOREOPERATION_RESET;
  if (HAL_RTC_SetTime(&RtcHandle, &sTime, RTC_FORMAT_BCD) != HAL_OK)
    while (1); 
  sDate.WeekDay = RTC_WEEKDAY_MONDAY;
  sDate.Month = RTC_MONTH_SEPTEMBER;
  sDate.Date = 0x14;
  sDate.Year = 0x0;
  if (HAL_RTC_SetDate(&RtcHandle, &sDate, RTC_FORMAT_BCD) != HAL_OK)
    while (1); 
}

void RTC_Init(void)
{
  RCC_OscInitTypeDef RCC_OscInitStruct;
  RCC_PeriphCLKInitTypeDef RCC_RTCPeriClkInit;

  // 1. Turn on the LSI
  RCC_OscInitStruct.OscillatorType = RCC_OSCILLATORTYPE_LSI;
  RCC_OscInitStruct.LSEState = RCC_LSI_ON;
  RCC_OscInitStruct.PLL.PLLState = RCC_PLL_NONE;
  if (HAL_RCC_OscConfig(&RCC_OscInitStruct) != HAL_OK)
    while (1);
  // 2. Select LSI as RTCCLK
  RCC_RTCPeriClkInit.PeriphClockSelection = RCC_PERIPHCLK_RTC;
  RCC_RTCPeriClkInit.RTCClockSelection = RCC_RTCCLKSOURCE_LSI;
  if (HAL_RCCEx_PeriphCLKConfig(&RCC_RTCPeriClkInit) != HAL_OK)
    while (1);
  
  __HAL_RCC_RTC_ENABLE(); 
  // Configure the NVIC for RTC TimeStamp. 
  HAL_NVIC_SetPriority(TAMP_STAMP_IRQn, 0x0F, 0);
  HAL_NVIC_EnableIRQ(TAMP_STAMP_IRQn);
  
  // Configure RTC prescaler and RTC data registers 
  // - Hour Format    = Format 12
  // - Asynch Prediv  = Value according to source clock
  // - Synch Prediv   = Value according to source clock
  // - OutPut         = Output Disable
  // - OutPutPolarity = High Polarity
  // - OutPutType     = Open Drain 
  __HAL_RTC_RESET_HANDLE_STATE(&RtcHandle);
  RtcHandle.Instance = RTC;
  RtcHandle.Init.HourFormat     = RTC_HOURFORMAT_12;
  RtcHandle.Init.AsynchPrediv   = RTC_ASYNCH_PREDIV;
  RtcHandle.Init.SynchPrediv    = RTC_SYNCH_PREDIV;
  RtcHandle.Init.OutPut         = RTC_OUTPUT_DISABLE;
  RtcHandle.Init.OutPutPolarity = RTC_OUTPUT_POLARITY_HIGH;
  RtcHandle.Init.OutPutType     = RTC_OUTPUT_TYPE_OPENDRAIN;
  __HAL_RTC_RESET_HANDLE_STATE(&RtcHandle);
  if (HAL_RTC_Init(&RtcHandle) != HAL_OK)
    while (1);
     
  RTC_TimeStampConfig();
}

// EXTI line detection callback.
void buttonISR() { stamp = true; }

extern "C" void HAL_RTCEx_TimeStampEventCallback(RTC_HandleTypeDef *hrtc) 
{
  HAL_RTCEx_GetTimeStamp(&RtcHandle, &sTimeStamp, &sDateStamp, RTC_FORMAT_BIN);
}

// This function handles RTC tamper and timestamp interrupts through EXTI line 21.
extern "C" void TAMP_STAMP_IRQHandler(void) 
{ 
  HAL_GPIO_WritePin(GPIOA, GPIO_PIN_5, GPIO_PIN_RESET);
  if (RTC->ISR & RTC_FLAG_TSOVF)
  {
    HAL_GPIO_WritePin(GPIOA, GPIO_PIN_5, GPIO_PIN_SET);
    CLEAR_BIT(RTC->ISR, RTC_FLAG_TSOVF);
  }
  HAL_RTCEx_TamperTimeStampIRQHandler(&RtcHandle); 
}

void setup()
{
  Serial.begin(9600);
  while(!Serial);

  RTC_Init();

  pinMode(LED_BUILTIN, OUTPUT);
  attachInterrupt(digitalPinToInterrupt(PC13), buttonISR, RISING);

  Serial.println("RTC Timestamp Demo");
}

void loop() 
{
    RTC_TimeTypeDef sTime;
    RTC_DateTypeDef sDate;
    char s[16] = { 0 };

    HAL_RTC_GetTime(&RtcHandle, &sTime, RTC_FORMAT_BIN);
    HAL_RTC_GetDate(&RtcHandle, &sDate, RTC_FORMAT_BIN);
    sprintf(s, "%02d:%02d", sTime.Minutes, sTime.Seconds); 
    Serial.print(s);
    
    if (stamp)
    {
      sprintf(s, " [%02d:%02d]", sTimeStamp.Minutes, sTimeStamp.Seconds);
      Serial.println(s); 
      stamp = false;
    }
    else
      Serial.println();
      
    HAL_Delay(1000);
}

Posted in Uncategorized | Leave a comment

STM32F405 Feather GPS Logger

Demonstrates UART/DMA Rx, FreeRTOS, specific sentence selection and SD Card logging.

Compiled with STMicroelectronics core using the Arduino IDE. Note, all references to “Serial” in the STM32 SD library SD.cpp file were commented out since this program is not using the builtin serial functionality. Program listing below:

// Adafruit STM32F405 Feather GPS Logger.
// UART DMA RX undetermined length data demonstration.
// build_opt.h: -DHAL_UART_MODULE_ENABLED -DHAL_UART_MODULE_ONLY
// USART: disabled, USB support: none.
//
// Adaptation of technique described here:
// https://stm32f4-discovery.net/2017/07/stm32-tutorial-efficiently-receive-uart-data-using-dma/
// https://community.st.com/s/global-search/efficiently%20use%20dma%20with%20uart%20rx%20stm32
//
// Comment out all reference to "Serial" inside STM32SD library file SD.cpp file.
//
// Use TeraTerm to connect to board at 9600 baud to monitor.
// USART1 RX is on SDA pin, PB7 (GPS).
// USART1 TX is on SCL pin, PB6 (PC).
// SD detect pin is on PB12 (D32).

#include <STM32FreeRTOS.h>
#include "stm32f4xx.h"
#include "stm32f4xx_hal.h"
#include "stm32f4xx_ll_dma.h"
#include "stm32f4xx_ll_usart.h"
#include "ctype.h"
#include <atomic>
// All reference to Serial commented out in STM32SD.h/SD.cpp files of STM32SD library.
#include <STM32SD.h>

#define BUFFER_SIZE 128

// UART and DMA handles.
UART_HandleTypeDef hUART1;
DMA_HandleTypeDef hDMA2;
 
// UART DMA data buffer.
uint8_t dmaBuffer[BUFFER_SIZE];
// GPS sentence buffer.
uint8_t sentenceBuffer[BUFFER_SIZE];

// Overkill? Because ARM Architecture Reference Manual says any aligned 32-bit memory access is atomic.
// https://developer.arm.com/documentation/ddi0403/eb/
std::atomic<uint32_t> count(0);
//uint32_t volatile __attribute__((aligned(4))) count{0};

// File object refers to gps text file.
File file;

void errorHandler(uint32_t d) 
{
  while(1) 
  {
    HAL_GPIO_TogglePin(GPIOC, GPIO_PIN_1);
    HAL_Delay(d);  
  }
}

void UART_DMA_Init(void)
{
  // USART1 init.
  __HAL_RCC_USART1_CLK_ENABLE();
  hUART1.Instance = USART1;
  hUART1.Init.BaudRate = 9600;
  hUART1.Init.WordLength = UART_WORDLENGTH_8B;
  hUART1.Init.StopBits = UART_STOPBITS_1;
  hUART1.Init.Parity = UART_PARITY_NONE;
  hUART1.Init.Mode = UART_MODE_TX_RX;
  hUART1.Init.HwFlowCtl = UART_HWCONTROL_NONE;
  hUART1.Init.OverSampling = UART_OVERSAMPLING_16;
  if (HAL_UART_Init(&hUART1) != HAL_OK)
    errorHandler(1000);
  HAL_NVIC_SetPriority(USART1_IRQn, 0, 0);
  HAL_NVIC_EnableIRQ(USART1_IRQn);
  LL_USART_EnableIT_IDLE(USART1);

  // DMA2 USART1 RX init.
  __HAL_RCC_DMA2_CLK_ENABLE();
  hDMA2.Instance = DMA2_Stream2;
  hDMA2.Init.Channel = DMA_CHANNEL_4;
  hDMA2.Init.Direction = DMA_PERIPH_TO_MEMORY;
  hDMA2.Init.PeriphInc = DMA_PINC_DISABLE;
  hDMA2.Init.MemInc = DMA_MINC_ENABLE;
  hDMA2.Init.PeriphDataAlignment = DMA_PDATAALIGN_BYTE;
  hDMA2.Init.MemDataAlignment = DMA_MDATAALIGN_BYTE;
  hDMA2.Init.Mode = DMA_CIRCULAR;
  hDMA2.Init.Priority = DMA_PRIORITY_VERY_HIGH;
  hDMA2.Init.FIFOMode = DMA_FIFOMODE_DISABLE;
  if (HAL_DMA_Init(&hDMA2) != HAL_OK)
      errorHandler(1000);
  __HAL_LINKDMA(&hUART1, hdmarx, hDMA2);

  LL_DMA_SetPeriphAddress(DMA2, LL_DMA_STREAM_2, (uint32_t)&USART1->DR);
  LL_DMA_SetMemoryAddress(DMA2, LL_DMA_STREAM_2, (uint32_t)dmaBuffer);
  LL_DMA_SetDataLength(DMA2, LL_DMA_STREAM_2, (uint32_t)BUFFER_SIZE);

  LL_DMA_EnableIT_HT(DMA2, LL_DMA_STREAM_2);
  LL_DMA_EnableIT_TC(DMA2, LL_DMA_STREAM_2);
  
  HAL_NVIC_SetPriority(DMA2_Stream2_IRQn, 0, 0);
  HAL_NVIC_EnableIRQ(DMA2_Stream2_IRQn);
  LL_DMA_EnableStream(DMA2, LL_DMA_STREAM_2);
  LL_USART_EnableDMAReq_RX(USART1);
}

void GPIO_Init(void)
{
  GPIO_InitTypeDef GPIO_InitStruct;

  __HAL_RCC_GPIOB_CLK_ENABLE();
  __HAL_RCC_GPIOC_CLK_ENABLE();

  // USART1 GPIOs: PB6->TX, PB7->RX
  GPIO_InitStruct.Pin = GPIO_PIN_6 | GPIO_PIN_7;
  GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
  GPIO_InitStruct.Pull = GPIO_PULLUP;
  GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH;
  GPIO_InitStruct.Alternate = GPIO_AF7_USART1;
  HAL_GPIO_Init(GPIOB, &GPIO_InitStruct);

  // LED GPIO pin.
  HAL_GPIO_WritePin(GPIOC, GPIO_PIN_1, GPIO_PIN_RESET);
  GPIO_InitStruct.Pin = GPIO_PIN_1;
  GPIO_InitStruct.Mode = GPIO_MODE_OUTPUT_PP;
  GPIO_InitStruct.Pull = GPIO_NOPULL;
  GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
  HAL_GPIO_Init(GPIOC, &GPIO_InitStruct);

  // Neopixel LED GPIO pin reset.
  HAL_GPIO_WritePin(GPIOC, GPIO_PIN_0, GPIO_PIN_RESET);
  GPIO_InitStruct.Pin = GPIO_PIN_0 | GPIO_PIN_2 | GPIO_PIN_3 | GPIO_PIN_4 | GPIO_PIN_5 | GPIO_PIN_6 | GPIO_PIN_7 | GPIO_PIN_13;
  GPIO_InitStruct.Mode = GPIO_MODE_ANALOG;
  GPIO_InitStruct.Pull = GPIO_NOPULL;
  HAL_GPIO_Init(GPIOC, &GPIO_InitStruct);
}

// Print RMC sentences.
void printRMCSentence(const size_t len)
{
  // Null terminate gps sentence.
  sentenceBuffer[len] = 0;

  if (strstr((const char *)sentenceBuffer, "$GPRMC") != NULL)
  {
    for (uint8_t i=0; i<len; i++)
    {
      LL_USART_TransmitData8(USART1, sentenceBuffer[i]);
      while (!LL_USART_IsActiveFlag_TXE(USART1));
    }
  
    // cr/lf.
    LL_USART_TransmitData8(USART1, '\r');
    while (!LL_USART_IsActiveFlag_TXE(USART1));
    LL_USART_TransmitData8(USART1, '\n');
    while (!LL_USART_IsActiveFlag_TXE(USART1));
  
    // Wait until xfer complete.
    while (!LL_USART_IsActiveFlag_TC(USART1));

    // Open file, write data then close file.
    file = SD.open("gps.txt", FILE_WRITE);
    if (file)
    {
      file.print((const char *)sentenceBuffer);
      file.close();
    }
  }
}

void constructGPSSentence(const void* data, size_t len)
{
  static size_t index = 0;
  const uint8_t* d = (const uint8_t*)data;
  
  while (len--)
  {
    if (*d == '\n')
      printRMCSentence(index);

    if (*d == '$' || index >= BUFFER_SIZE)
      index = 0;

    // Eat cr/lf (0x0d/0x0a).
    if (*d == '\n' || *d == '\r')
      d++;
    else
      sentenceBuffer[index++] = *d++;
  }
}

// Extract data from circular buffer.
void checkData(void)
{
  static size_t prevPos;
  size_t curPos;

  // Calculate current position in buffer.
  curPos = BUFFER_SIZE - LL_DMA_GetDataLength(DMA2, LL_DMA_STREAM_2);

  // Check for change in received data.
  if (curPos != prevPos)
  {
    if (curPos > prevPos)
    {
      // Process data directly by subtracting pointers.
      constructGPSSentence(&dmaBuffer[prevPos], curPos - prevPos);
    }
    else
    {
      // Buffer overflow occurred, first, process data to the end of buffer.
      constructGPSSentence(&dmaBuffer[prevPos], BUFFER_SIZE - prevPos);
      // Check and continue with beginning of buffer.
      if (curPos)
        constructGPSSentence(&dmaBuffer[0], curPos);
    }
  }
  
  // Save current position.
  prevPos = curPos;
  
  // Check and manually update if at end of buffer.
  if (prevPos == BUFFER_SIZE)
    prevPos = 0;
}

// Rx dma stream interrupt check for HT/TC interrupts.
extern "C" void DMA2_Stream2_IRQHandler(void)
{
  if (LL_DMA_IsEnabledIT_HT(DMA2, LL_DMA_STREAM_2) && LL_DMA_IsActiveFlag_HT2(DMA2))
  {
    // Clear half-transfer complete flag.
    LL_DMA_ClearFlag_HT2(DMA2);
    // Flag data needs processing.
    std::atomic_fetch_add(&count, 1);
  }

  if (LL_DMA_IsEnabledIT_TC(DMA2, LL_DMA_STREAM_2) && LL_DMA_IsActiveFlag_TC2(DMA2))
  {
    // Clear half-transfer complete flag.
    LL_DMA_ClearFlag_TC2(DMA2);      
    // Flag data needs processing.
    std::atomic_fetch_add(&count, 1);
  }
}

// USART1 global interrupt check IDLE line.
extern "C" void USART1_IRQHandler(void)
{
  if (LL_USART_IsEnabledIT_IDLE(USART1) && LL_USART_IsActiveFlag_IDLE(USART1))
  {
    // Clear IDLE line flag.
    LL_USART_ClearFlag_IDLE(USART1);
    // Flag data needs processing.
    std::atomic_fetch_add(&count, 1);
  }
}

void checkTask(void *arg) 
{
  UNUSED(arg);

  UART_DMA_Init();
  __HAL_UART_FLUSH_DRREGISTER(&hUART1);
  HAL_UART_Transmit(&hUART1, (uint8_t *)"USART/DMA FreeRTOS GPS Demo\n\r", 29, 100);

  while (1)
  {
    if (std::atomic_load(&count))
    {
      std::atomic_fetch_sub(&count, 1);
      checkData();
    }
  }
}

void setup()
{
  GPIO_Init();

  if (!SD.begin())
    errorHandler(100);

  xTaskCreate(checkTask, NULL, configMINIMAL_STACK_SIZE, NULL, 1, NULL);
  vTaskStartScheduler();
}

void loop() { }
Posted in Uncategorized | Leave a comment

Receiving Data of Indeterminate Length via STM32F4 UART DMA

Adafruit STM32F405 Feather Demonstration

For applications where you don’t know the length of a data stream prior to receiving it via DMA UART. Uses Arduino IDE with STMicroelectronics 2.0 core.

Here is a picture of the Adafruit STM32F405 Feather connected to a GPS receiver and using the following code to simply repeat the GPS sentences to a terminal program running on a PC.

Terminal output:

// Adafruit STM32F405 Feather
// UART DMA RX undetermined length data demonstration.
//
// Adaptation of technique described here:
// https://stm32f4-discovery.net/2017/07/stm32-tutorial-efficiently-receive-uart-data-using-dma/
// https://community.st.com/s/global-search/efficiently%20use%20dma%20with%20uart%20rx%20stm32
//
// Use TeraTerm to connect to board at 9600 baud.
// USART1 RX is on SDA pin, PB7.
// USART1 TX is on SCL pin, PB6.
// Observe chars are repeated with case change.
//
#include "stm32f4xx.h"
#include "stm32f4xx_hal.h"
#include "stm32f4xx_ll_dma.h"
#include "stm32f4xx_ll_usart.h"
#include "ctype.h"

// UART and DMA handles.
UART_HandleTypeDef hUART1;
DMA_HandleTypeDef hDMA2;
// Buffer of unprocessed UART DMA data.
static uint8_t dmaBuffer[16];

#define BUFFER_SIZE ( sizeof(dmaBuffer) / sizeof(dmaBuffer[0]) )

void errorHandler(void) {
  while(1) {
    HAL_GPIO_TogglePin(GPIOC, GPIO_PIN_1);
    HAL_Delay(250);  
  }
}

void UART_DMA_Init(void)
{
  // USART1 init.
  __HAL_RCC_USART1_CLK_ENABLE();
  hUART1.Instance = USART1;
  hUART1.Init.BaudRate = 9600;
  hUART1.Init.WordLength = UART_WORDLENGTH_8B;
  hUART1.Init.StopBits = UART_STOPBITS_1;
  hUART1.Init.Parity = UART_PARITY_NONE;
  hUART1.Init.Mode = UART_MODE_TX_RX;
  hUART1.Init.HwFlowCtl = UART_HWCONTROL_NONE;
  hUART1.Init.OverSampling = UART_OVERSAMPLING_16;
  if (HAL_UART_Init(&hUART1) != HAL_OK)
    errorHandler();
  HAL_NVIC_SetPriority(USART1_IRQn, 0, 0);
  HAL_NVIC_EnableIRQ(USART1_IRQn);
  LL_USART_EnableIT_IDLE(USART1);

  // DMA2 USART1 RX init.
  __HAL_RCC_DMA2_CLK_ENABLE();
  hDMA2.Instance = DMA2_Stream2;
  hDMA2.Init.Channel = DMA_CHANNEL_4;
  hDMA2.Init.Direction = DMA_PERIPH_TO_MEMORY;
  hDMA2.Init.PeriphInc = DMA_PINC_DISABLE;
  hDMA2.Init.MemInc = DMA_MINC_ENABLE;
  hDMA2.Init.PeriphDataAlignment = DMA_PDATAALIGN_BYTE;
  hDMA2.Init.MemDataAlignment = DMA_MDATAALIGN_BYTE;
  hDMA2.Init.Mode = DMA_CIRCULAR;
  hDMA2.Init.Priority = DMA_PRIORITY_VERY_HIGH;
  hDMA2.Init.FIFOMode = DMA_FIFOMODE_DISABLE;
  if (HAL_DMA_Init(&hDMA2) != HAL_OK)
      errorHandler();
  __HAL_LINKDMA(&hUART1, hdmarx, hDMA2);

  LL_DMA_SetPeriphAddress(DMA2, LL_DMA_STREAM_2, (uint32_t)&USART1->DR);
  LL_DMA_SetMemoryAddress(DMA2, LL_DMA_STREAM_2, (uint32_t)dmaBuffer);
  LL_DMA_SetDataLength(DMA2, LL_DMA_STREAM_2, (uint32_t)BUFFER_SIZE);

  LL_DMA_EnableIT_HT(DMA2, LL_DMA_STREAM_2);
  LL_DMA_EnableIT_TC(DMA2, LL_DMA_STREAM_2);
  
  HAL_NVIC_SetPriority(DMA2_Stream2_IRQn, 0, 0);
  HAL_NVIC_EnableIRQ(DMA2_Stream2_IRQn);
  LL_DMA_EnableStream(DMA2, LL_DMA_STREAM_2);
  LL_USART_EnableDMAReq_RX(USART1);
}

void GPIO_Init(void)
{
  GPIO_InitTypeDef GPIO_InitStruct;

  __HAL_RCC_GPIOB_CLK_ENABLE();
  __HAL_RCC_GPIOC_CLK_ENABLE();

  // USART1 GPIOs: PB6->TX, PB7->RX
  GPIO_InitStruct.Pin = GPIO_PIN_6 | GPIO_PIN_7;
  GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
  GPIO_InitStruct.Pull = GPIO_PULLUP;
  GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH;
  GPIO_InitStruct.Alternate = GPIO_AF7_USART1;
  HAL_GPIO_Init(GPIOB, &GPIO_InitStruct);

  // LED GPIO pin.
  HAL_GPIO_WritePin(GPIOC, GPIO_PIN_1, GPIO_PIN_RESET);
  GPIO_InitStruct.Pin = GPIO_PIN_1;
  GPIO_InitStruct.Mode = GPIO_MODE_OUTPUT_PP;
  GPIO_InitStruct.Pull = GPIO_NOPULL;
  GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
  HAL_GPIO_Init(GPIOC, &GPIO_InitStruct);

  // Neopixel LED GPIO pin reset.
  HAL_GPIO_WritePin(GPIOC, GPIO_PIN_0, GPIO_PIN_RESET);
}

// Process received data.
void processData(const void* data, size_t len)
{
  const uint8_t* d = (const uint8_t*)data;

  while (len--) 
  {
    LL_USART_TransmitData8(USART1, tolower(*d++));
    while (!LL_USART_IsActiveFlag_TXE(USART1));
  }
  while (!LL_USART_IsActiveFlag_TC(USART1));
}

// Check for new data received via DMA.
void checkData(void)
{
  static size_t prevPos;
  size_t curPos;

  // Calculate current position in buffer.
  curPos = BUFFER_SIZE - LL_DMA_GetDataLength(DMA2, LL_DMA_STREAM_2);

  // Check for change in received data.
  if (curPos != prevPos)
  {
    if (curPos > prevPos)
    {
      // Process data directly by subtracting pointers.
      processData(&dmaBuffer[prevPos], curPos - prevPos);
    }
    else
    {
      // Buffer overflow occurred, first, process data to the end of buffer.
      processData(&dmaBuffer[prevPos], BUFFER_SIZE - prevPos);
      // Check and continue with beginning of buffer.
      if (curPos)
        processData(&dmaBuffer[0], curPos);
    }
  }
  
  // Save current position.
  prevPos = curPos;
  
  // Check and manually update if at end of buffer.
  if (prevPos == BUFFER_SIZE)
    prevPos = 0;
}

// Rx dma stream interrupt check for HT/TC interrupts.
extern "C" void DMA2_Stream2_IRQHandler(void)
{
  if (LL_DMA_IsEnabledIT_HT(DMA2, LL_DMA_STREAM_2) && LL_DMA_IsActiveFlag_HT2(DMA2))
  {
    // Clear half-transfer complete flag.
    LL_DMA_ClearFlag_HT2(DMA2);
    // Check for data to process.
    checkData();
  }

  if (LL_DMA_IsEnabledIT_TC(DMA2, LL_DMA_STREAM_2) && LL_DMA_IsActiveFlag_TC2(DMA2))
  {
    // Clear half-transfer complete flag.
    LL_DMA_ClearFlag_TC2(DMA2);      
    // Check for data to process.
    checkData();                     
  }
}

// USART1 global interrupt checks IDLE line.
extern "C" void USART1_IRQHandler(void)
{
  if (LL_USART_IsEnabledIT_IDLE(USART1) && LL_USART_IsActiveFlag_IDLE(USART1))
  {
    // Clear IDLE line flag.
    LL_USART_ClearFlag_IDLE(USART1); 
    // Check for data to process.
    checkData();                     
  }
}

void setup()
{
  GPIO_Init();
  UART_DMA_Init();
  __HAL_UART_FLUSH_DRREGISTER(&hUART1);
  HAL_UART_Transmit(&hUART1, (uint8_t *)"USART/DMA Demo\r\n", 16, 100);
}

void loop() { }

Code updated 7.17.21 to remove error, and correct attribution.

Posted in Uncategorized | Leave a comment

DMA in the IDE, Part V

DMA UART M2P Register-Level Programming

In the final post on STM32 DMA programming in the Arduino IDE, we will examine register-level setup of the peripherals. The demo program here is a simple refactoring of a previous program that utilizes DMA M2P via the UART.

Bare-Metal Programming

Register-level coding is sometimes referred to as scratch or bare-metal-programming. It involves foregoing the use of the HAL and SPL high-level library functions and directly reading and writing to the peripheral control registers. Typically, this type of programming will require frequent consultation with both the microcontroller datasheet and reference manual. At the end of each chapter in the reference manual is a section entitled Registers, that is particularly helpful.

The STM32F405 Reference Manual and Datasheet are both available online.

Once compiled and installed on the STM32F405 Feather board, this program simply sends “Hello World” through UART1 via the DMA2 M2P mode every time the button is pressed. The button must be connected to the PB13, which is the pin labelled “SCK”.

Note, their are additional file and IDE setting requirements outlined in the comment section of the code.

To fully understand the intricacies of this code you must complete the uDemy course, ARM Cortex M Microcontroller DMA Programming Demystified. All I did was adapt the example code supplied with the course for the Adafruit STM32F405 Feather board and the Arduino IDE. Please note, I am not affiliated with this course or the uDemy website in any manner. I simply found this course to be very helpful when I was learning DMA programming on the STM32 series of microcontrollers.

The Code

// Register level application code.
// Memory to Peripheral (m2p) DMA Transfer.
// SRAM1 --> USART1_TX
//
// Adafruit STM32F405 Feather.
// Modified STM32F446RE Nucleo Board code.
//
// Button (PB13, or SCK) with external interrupt.
// Button connected to ground.
//
// Add build_opt.h file with -DHAL_UART_MODULE_ENABLED
// Tools > U(S)ART Support: Disabled.
// Add hal_conf_extra.h file with #define HAL_EXTI_MODULE_DISABLED

#define isHT() DMA2->HISR & (1 << 26)
#define isFT() DMA2->HISR & (1 << 27)
#define isTE() DMA2->HISR & (1 << 25)
#define isFE() DMA2->HISR & (1 << 22)
#define isDME() DMA2->HISR & (1 << 24)

char data[] ="Hello World\r\n";

// Test uart peripheral.
void sendSomeData(void) {
  USART_TypeDef *pUART1 = USART1;
  char someData[] = "Hello World\r\n";

  // Make sure that status register TXE is set (TDR is empty).
  // If TXE is 1, put a byte in DR.
  uint32_t len = sizeof(someData);

  for (uint32_t i=0 ; i<len ; i++) {
    // Waiting for TXE to become 1.
    while( !(pUART1->SR & (1 << 7)) ) ;

    pUART1->DR = someData[i];
  }
}

// Clears pending interrupt bit.
void clearExtiPendingBit(void) {
  EXTI_TypeDef *pEXTI;
  pEXTI = EXTI;

  if ((EXTI->PR & (1 << 13)))
    EXTI->PR |= (1 << 13);
}

// IRQ handler for the button interrupt.
extern "C" void EXTI15_10_IRQHandler(void)
{
  USART_TypeDef *pUART1 = USART1;

  // Send UART1_TX DMA request to DMA2 controller.
  pUART1->CR3 |= (1 << 7);
  clearExtiPendingBit();
}

void dma2EnableStream7(void) {
  DMA_Stream_TypeDef *pSTREAM7 = DMA2_Stream7;
  
  // Enable stream.
  pSTREAM7->CR |= (1 << 0);
}

void callbackErrorTE(void) { while(1); }
void callabckErrorFE(void) { while(1); }
void callbackErrorDME(void) { while(1); }
void callbackHTComplete(void) { /* Do nothing. */ }

void callabckFTComplete(void) {
  USART_TypeDef *pUART1 = USART1;
  DMA_Stream_TypeDef *pSTREAM7 = DMA2_Stream7;

  // Program number of data items to send.
  uint32_t len = sizeof(data);
  pSTREAM7->NDTR = len;

  pUART1->CR3 &= ~(1 << 7);

  dma2EnableStream7();
}

// IRQ handler for DMA2 stream7 global interrupt.
extern "C" void DMA2_Stream7_IRQHandler(void) {
  if (isHT()) {
    // Half transfer.
    DMA2->HIFCR |= (1 << 26);
    callbackHTComplete();
    
  } else if (isFT()) {
    // Full transfer.
    DMA2->HIFCR |= (1 << 27);
    callbackFTComplete();
  
  } else if (isTE()) {
    // Transfer error.
    DMA2->HIFCR |= (1 << 25);
    callbackErrorTE();
  
  } else if (isFE()) {
    // FIFO error.
    DMA2->HIFCR |= (1 << 22);
    callbackErrorFE();
  
  } else if (isDME()) {
    // Direct mode error.
    DMA2->HIFCR |= (1 << 24);
    callbackErrorDME();
  
  } else
    ;
}

void dma2InterruptConfig(void) {
  DMA_Stream_TypeDef *pSTREAM7 = DMA2_Stream7;

  // 1. Half-transfer IE (HTIE).
  pSTREAM7->CR |= (1 << 3);
  
  // 2. Transfer complete IE (TCIE).
  pSTREAM7->CR |= (1 << 4);
  
  // 3. Transfer error IE (TEIE).
  pSTREAM7->CR |= (1 << 2);
  
  // 4. FIFO overrun/underrun IE (FEIE).
  pSTREAM7->FCR |= (1 << 7);
  
  // 5. Direct mode error (DMEIE).
  pSTREAM7->CR |= (1 << 1);
  
  // 6. Enable IRQ for DMA2 stream 7 global interrupt in NVIC.
  NVIC_EnableIRQ(DMA2_Stream7_IRQn);
}

void dma2Init(void) {
  RCC_TypeDef *pRCC = RCC;
  DMA_Stream_TypeDef *pSTREAM7 = DMA2_Stream7;
  USART_TypeDef *pUART1 = USART1;

  // 1. enable the peripheral clock for the dma2.
  pRCC->AHB1ENR |= (1 << 22);
  
  // 2. Identify the stream which is suitable for your peripheral.
  // <channel 4 , stream 7>.
  
  // 3. Identify the channel number on which uart1 peripheral sends DMA request.
  // <channel 4>.
  pSTREAM7->CR &= ~(0x7 << 25);
  pSTREAM7->CR |= (0x4 << 25);
  
  // 4. Program the source address (memory).
  pSTREAM7->M0AR = (uint32_t)data;
  
  // 5. Program the destination address (peripheral).
  pSTREAM7->PAR = (uint32_t)&pUART1->DR;
  
  // 6. Program number of data items to send.
  uint32_t len = sizeof(data);
  pSTREAM7->NDTR = len;
  
  // 7. The direction of data transfer. m2p, p2m, m2m.
  pSTREAM7->CR |= (0x1 << 6);
  
  // 8. Program the source and destination data width.
  pSTREAM7->CR &= ~(0x3 << 13);
  pSTREAM7->CR &= ~(0x3 << 11);
  
  // 8a. Enable memory auto increment.
  pSTREAM7->CR |= (1 << 10);
  
  // 9. Direct mode or fifo mode.
  pSTREAM7->FCR |= (1 << 2);
  
  // 10. Select the fifo threshold.
  pSTREAM7->FCR &= ~(0x3 << 0); // Clearing
  pSTREAM7->FCR |= (0x3 << 0);  // Setting
  
  // 11. Enable the circular mode if required.
  // 12. Single transfer or burst transfer.
  // 13. Configure the stream priority.
}

// USART1 configuration (PB6-->USART1_TX, PB7-->USART1_RX)
void uart1Init(void) {
  RCC_TypeDef *pRCC = RCC;
  GPIO_TypeDef *pGPIOB = GPIOB;
  USART_TypeDef *pUART1 = USART1;

  // 1. Enable the peripheral clock for the UART1 peripheral.
  pRCC->APB2ENR |= (1 << 4);
  
  // 2. Configure the GPIO pins for uart_tx and uart_rx functionality.
  // Configure PB6 as TX.
  // 2.1 Enable the clock for the GPIOB peripheral.
  pRCC->AHB1ENR |= (1 << 1);
  
  // 2.2 Change the mode of PB6 to alternate function.
  pGPIOB->MODER &= ~(0x3 << 12);
  pGPIOB->MODER |= (0x2 << 12);
  pGPIOB->AFR[0] &= ~(0xF << 24);
  pGPIOB->AFR[0] |= (0x7 << 24);
  
  // 2.3 Enable or disable Pull-up resistor for PB6.
  pGPIOB->PUPDR |= (0x1 << 12);

  // Configure PB7 as UART2 RX.
  // 2.4 Change the mode of the PB7 to alternate function.
  pGPIOB->MODER  &= ~(0x3 << 14);
  pGPIOB->MODER  |= (0x2 << 14);
  pGPIOB->AFR[0] &= ~(0xF << 28);
  pGPIOB->AFR[0] |= (0x7 << 28);
  
  // 2.5 Enable or disable Pull-up resistor for PB7.
  pGPIOB->PUPDR |= (0x1 << 14);

  // 3. Configure baudrate.
  pUART1->BRR = 0x222e; // 9600@84MHz (OVER8=0), Table 142, 546.875 (546=0x222, shift left 1=0x2220 + (0.875*16=0xE) 0x222E).
  
  // 4. Configure data width, # of stop bits, etc.
  // <no configuration required here, use default values>
  
  // 5. Enable TX for the UART peripheral.
  pUART1->CR1 |= (1 << 3);
  
  // 6. Enable UART peripheral.
  pUART1->CR1 |= (1 << 13);
}

void buttonInit(void)
{
  // Button is connected to PB13.
  GPIO_TypeDef *pGPIOB = GPIOB;
  RCC_TypeDef *pRCC = RCC;
  EXTI_TypeDef *pEXTI = EXTI;
  SYSCFG_TypeDef *pSYSCFG = SYSCFG;

  // 1. Enable the peripheral clock for the GPIOB peripheral.
  pRCC->AHB1ENR |= (1 << 1); 

  // 2. Keep GPIO pin in input mode.
  pGPIOB->MODER &= ~(0x3 << 26);

  // 2.1 Enable pull-up resistor for PB13.
  pGPIOB->PUPDR |= (0x1 << 26);

  // 3. Enable the interrupt for the GPIO pin.
  pEXTI->IMR |= (1 << 13);
  
  // 4. Enable the clock for SYSCFG.
  pRCC->APB2ENR |= (1 << 14);
  
  // 5. Configuring the SYSCFG CR4 register.
  pSYSCFG->EXTICR[3] &= ~(0xF << 4); // Clearing
  pSYSCFG->EXTICR[3] |= (0x1 << 4);  // Set
  
  // 6. Configure edge detection for the EXTI 13 line.
  pEXTI->FTSR |= (1 << 13);
  
  // 7. Enable the IRQ related to the GPIO pin in NVIC register of processor.
  NVIC_EnableIRQ(EXTI15_10_IRQn);
}

void setup() {
  buttonInit();
  uart1Init();
  //sendSomeData();
  dma2Init();
  dma2InterruptConfig();
  dma2EnableStream7();
}

void loop() { }

Posted in Uncategorized | Leave a comment

DMA in the IDE, Part IV

STM32 DMA ADC P2M Demo

This is the fourth example of DMA usage on the STM32F405 Feather board. Programming is from the Arduino IDE. This example demonstrates the peripheral to memory (P2M) DMA Mode. We send data from the ADC peripheral to memory via DMA.

This code sends the internal temperature of the MCU to the serial monitor every second.

The Code

// DMA ADC P2M Demo.
// Adafruit SMT32F405 Feather.
//
// stm32f405 CubeMX Setup:
// ADC1, Temperature Sensor Channel.
// Continuous Conversion Mode: Enabled.
// DMA Continuous Requests: Enabled.
// Number of Conversion: 16.
// DMA Request, Add: ADC1
// [per stm32f405 Ref Man, pg 308, DMA2 Request Mapping Table]
// System Core, DMA2 Request, Channel 0, ADC1 Stream4.
// Increment memory by half-word (12-bit resolution).
//
// Tools > C Runtime Library: Newlib Nano + Float Scanf
//
#include <stdio.h>
#include <string.h>
#include <stdbool.h>

volatile bool adcDataAvailable __attribute__ ((aligned));
volatile uint16_t adcData[16];
constexpr uint32_t INTERVAL = 1000; // ms.
uint32_t t = millis();

ADC_HandleTypeDef hadc1;
DMA_HandleTypeDef hdma_adc1;

// Handle DMA2 stream4 global interrupt.
extern "C" void DMA2_Stream0_IRQHandler(void) {
  HAL_DMA_IRQHandler(&hdma_adc1);
}

// ADC conversion complete callback.
extern "C" void HAL_ADC_ConvCpltCallback(ADC_HandleTypeDef* hadc) { 
  UNUSED(hadc); 
  adcDataAvailable = true;
}

void adcInit(void) {
  ADC_ChannelConfTypeDef sConfig = {0};

  __HAL_RCC_ADC1_CLK_ENABLE();
  hadc1.Instance = ADC1;
  hadc1.Init.ClockPrescaler = ADC_CLOCK_SYNC_PCLK_DIV2;
  hadc1.Init.Resolution = ADC_RESOLUTION_12B;
  hadc1.Init.ScanConvMode = DISABLE;
  hadc1.Init.ContinuousConvMode = ENABLE;
  hadc1.Init.DiscontinuousConvMode = DISABLE;
  hadc1.Init.ExternalTrigConvEdge = ADC_EXTERNALTRIGCONVEDGE_NONE;
  hadc1.Init.ExternalTrigConv = ADC_SOFTWARE_START;
  hadc1.Init.DataAlign = ADC_DATAALIGN_RIGHT;
  hadc1.Init.NbrOfConversion = 16;
  hadc1.Init.DMAContinuousRequests = ENABLE;
  hadc1.Init.EOCSelection = ADC_EOC_SINGLE_CONV;
  if (HAL_ADC_Init(&hadc1) != HAL_OK)
    while(1);
  
  hdma_adc1.Instance = DMA2_Stream0;
  hdma_adc1.Init.Channel = DMA_CHANNEL_0;
  hdma_adc1.Init.Direction = DMA_PERIPH_TO_MEMORY;
  hdma_adc1.Init.PeriphInc = DMA_PINC_DISABLE;
  hdma_adc1.Init.MemInc = DMA_MINC_ENABLE;
  hdma_adc1.Init.PeriphDataAlignment = DMA_PDATAALIGN_HALFWORD;
  hdma_adc1.Init.MemDataAlignment = DMA_MDATAALIGN_HALFWORD;
  hdma_adc1.Init.Mode = DMA_NORMAL;
  hdma_adc1.Init.Priority = DMA_PRIORITY_LOW;
  hdma_adc1.Init.FIFOMode = DMA_FIFOMODE_DISABLE;
  if (HAL_DMA_Init(&hdma_adc1) != HAL_OK)
    while(1);

  __HAL_LINKDMA(&hadc1, DMA_Handle, hdma_adc1);

  sConfig.Channel = ADC_CHANNEL_TEMPSENSOR;
  sConfig.SamplingTime = ADC_SAMPLETIME_3CYCLES;
  for (uint32_t i=1; i<=16; i++) {
    sConfig.Rank = i;
    if (HAL_ADC_ConfigChannel(&hadc1, &sConfig) != HAL_OK)
      while(1);
  }
}

void dmaInit(void) {
  __HAL_RCC_DMA2_CLK_ENABLE();
  HAL_NVIC_SetPriority(DMA2_Stream0_IRQn, 0, 0);
  HAL_NVIC_EnableIRQ(DMA2_Stream0_IRQn);
}

// Convert sample to temperature.
float calcTemp() {
  // See STM32F405 Reference Manual pg. 413.
  const float AVG_SLOPE = 2.5;
  const float V25 = 0.76;
  const float ADC_TO_VOLT = 3.3 / (4096 - 1);
  float adcValue = 0.0;
 
  for (int i=0; i<16; i++) {
    adcValue += (float)adcData[i];
    adcData[i] = 0;
  }
  adcValue /= 16.0;
  
  float vSense = adcValue * ADC_TO_VOLT;
  float temp = (vSense - V25) / AVG_SLOPE + 25.0f;
  return temp;
}

void setup() {
  dmaInit();
  adcInit();
  Serial.begin(9600);
  while(!Serial);
  adcDataAvailable = false;
}

void loop(void) {
  if (millis() > t) {
    t = millis() + INTERVAL;
    HAL_ADC_Start_DMA(&hadc1, (uint32_t*)adcData, 16);
  }
  
  if (adcDataAvailable) {
    char s[16] = {0};
     
    adcDataAvailable = false;
    sprintf(s, "temp = %3.2f C", calcTemp());
    Serial.println(s);
  }
}
Posted in Uncategorized | Leave a comment

DMA in the IDE, Part III

STM32 DMA UART Transmit M2P Demo

This is the third example of DMA usage on the STM32F405 Feather board. Programming is via the Arduino IDE. This example demonstrates the memory to peripheral (M2P) DMA Mode. We send data from memory to the UART via DMA.

For this demo to work, the Arduino serial code needs to be disabled. The instructions are included in the code comments. Also, you will note some code inside the main loop function. The purpose of this code is to reset the transmit registers so the MCU acknowledges the transmission is complete. This code would not be necessary except the Arduino STM32 code uses the USART1_IRQHandler and therefore doesn’t reset after the the UART1 transmit.

Make a serial connection to the SCL pin (PB6). Every press of the button sends a paragraph of text to the serial terminal.

The Code

// DMA USART2 transfer M2P memory to peripheral.
// Adafruit STM32F405 Feather.
//
// Add build_opt.h file with -DHAL_UART_MODULE_ENABLED
// Tools > U(S)ART Support: Disabled.
//
// SCL (PB6) TX pin.
// 10 (PB9) Button pin.
//

uint8_t data_stream[] =
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor \
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud \
exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure \
dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. \
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt \
mollit anim id est laborum.\r\n";

uint32_t data_length{sizeof(data_stream)};

uint32_t deBounce{0};
uint32_t lastBounce{0};
constexpr uint32_t DEBOUNCE_INTERVAL{50};

UART_HandleTypeDef huart1;
DMA_HandleTypeDef hdma_usart1_tx;

extern "C" void DMA2_Stream7_IRQHandler(void) { HAL_DMA_IRQHandler(&hdma_usart1_tx); }

void uartInit(void) {
  huart1.Instance = USART1;
  huart1.Init.BaudRate = 9600;
  huart1.Init.WordLength = UART_WORDLENGTH_8B;
  huart1.Init.StopBits = UART_STOPBITS_1;
  huart1.Init.Parity = UART_PARITY_NONE;
  huart1.Init.Mode = UART_MODE_TX_RX;
  huart1.Init.HwFlowCtl = UART_HWCONTROL_NONE;
  huart1.Init.OverSampling = UART_OVERSAMPLING_16;

  GPIO_InitTypeDef GPIO_InitStruct = {0};
  
  __HAL_RCC_USART1_CLK_ENABLE();
  __HAL_RCC_GPIOB_CLK_ENABLE();
  
  // USART1 GPIO Configuration    
  // PB6 --> USART1_TX
  // PB7 --> USART1_RX 
  GPIO_InitStruct.Pin = GPIO_PIN_6 | GPIO_PIN_7;
  GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
  GPIO_InitStruct.Pull = GPIO_PULLUP;
  GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH;
  GPIO_InitStruct.Alternate = GPIO_AF7_USART1;
  HAL_GPIO_Init(GPIOB, &GPIO_InitStruct);

  hdma_usart1_tx.Instance = DMA2_Stream7;
  hdma_usart1_tx.Init.Channel = DMA_CHANNEL_4;
  hdma_usart1_tx.Init.Direction = DMA_MEMORY_TO_PERIPH;
  hdma_usart1_tx.Init.PeriphInc = DMA_PINC_DISABLE;
  hdma_usart1_tx.Init.MemInc = DMA_MINC_ENABLE;
  hdma_usart1_tx.Init.PeriphDataAlignment = DMA_PDATAALIGN_BYTE;
  hdma_usart1_tx.Init.MemDataAlignment = DMA_MDATAALIGN_BYTE;
  hdma_usart1_tx.Init.Mode = DMA_NORMAL;
  hdma_usart1_tx.Init.Priority = DMA_PRIORITY_LOW;
  hdma_usart1_tx.Init.FIFOMode = DMA_FIFOMODE_DISABLE;
  if (HAL_DMA_Init(&hdma_usart1_tx) != HAL_OK)
      while(1);
      
  __HAL_LINKDMA(&huart1, hdmatx, hdma_usart1_tx);

  if (HAL_UART_Init(&huart1) != HAL_OK)
    while(1);
}

void dmaInit(void) {
  __HAL_RCC_DMA2_CLK_ENABLE();
  HAL_NVIC_SetPriority(DMA2_Stream7_IRQn, 0, 0);
  HAL_NVIC_EnableIRQ(DMA2_Stream7_IRQn);
}

void buttonISR() {
  if (millis() > deBounce) {
    deBounce = millis() + DEBOUNCE_INTERVAL;
    HAL_UART_Transmit_DMA(&huart1, data_stream, (uint16_t)data_length);
  }
}

void setup(void) {
  dmaInit();
  uartInit();
  pinMode(10, INPUT_PULLUP);
  attachInterrupt(digitalPinToInterrupt(PB9), buttonISR, FALLING);
}

void loop() {
  if (deBounce != lastBounce) {
    lastBounce = deBounce;
    // UART in mode Transmision end?
    while ( !(huart1.Instance->SR & USART_SR_TC) && 
            !(huart1.Instance->CR1 & USART_CR1_TCIE) ) ;
    // Disable TXEIE and TCIE interrupts. 
    huart1.Instance->CR1 &= ~(USART_CR1_TXEIE | USART_CR1_TCIE);
    // At end of Tx process, restore huart->gState to ready. 
    huart1.gState = HAL_UART_STATE_READY;
 } 
}

Posted in Uncategorized | Leave a comment

DMA in the IDE, Part II

STM32 DMA UART Receive P2M Demo

This is the second example of DMA usage on the STM32F405 Feather board. Programming is from the Arduino IDE. This example demonstrates the peripheral to memory (P2M) DMA Mode. We send serial data to the board via UART DMA storing the data into SRAM memory.

For this demo to work, the Arduino serial code needs to be disabled. The instructions are included in the code comments.

Make a serial connection to the SDA pin (PB7). Every 5 characters the board receives toggles the onboard LED.

The Code

// USART1 Rx to SRAM DMA transfer.
// Interrupt mode, transfer complete initiates a callback.
//
// Add build_opt.h file to project with -DHAL_UART_MODULE_ENABLED.
// Tools > U(S)ART Support: Disabled.
// Change option in file: stm32f4xx_hal_conf_default.h
// Located here: 
// C:\Users\default.LAPTOP-7V09ROBA\AppData\Local\Arduino15\packages\STM32\hardware\stm32\1.9.0\system\STM32F4xx
// #define USE_HAL_UART_REGISTER_CALLBACKS 1U
//
// Use TeraTerm to connect to board at 9600 baud.
// USART1 RX is on SDA pin, PB7.
// Every 5 chars sent should toggle LED.
//
#define LED_Pin       GPIO_PIN_1
#define LED_GPIO_Port GPIOC

#define DATA_LENGTH 5
uint8_t data[DATA_LENGTH];

UART_HandleTypeDef huart1;
DMA_HandleTypeDef hdma_usart1_rx;

extern "C" void myCallback(UART_HandleTypeDef *huart) {
  UNUSED(huart);
  HAL_GPIO_TogglePin(LED_GPIO_Port, LED_Pin);
}

extern "C" void DMA2_Stream2_IRQHandler(void) {
  HAL_DMA_IRQHandler(&hdma_usart1_rx);
}

void uartInit(void) {
  huart1.Instance = USART1;
  huart1.Init.BaudRate = 9600;
  huart1.Init.WordLength = UART_WORDLENGTH_8B;
  huart1.Init.StopBits = UART_STOPBITS_1;
  huart1.Init.Parity = UART_PARITY_NONE;
  huart1.Init.Mode = UART_MODE_TX_RX;
  huart1.Init.HwFlowCtl = UART_HWCONTROL_NONE;
  huart1.Init.OverSampling = UART_OVERSAMPLING_16;

  __HAL_RCC_USART1_CLK_ENABLE();
  hdma_usart1_rx.Instance = DMA2_Stream2;
  hdma_usart1_rx.Init.Channel = DMA_CHANNEL_4;
  hdma_usart1_rx.Init.Direction = DMA_PERIPH_TO_MEMORY;
  hdma_usart1_rx.Init.PeriphInc = DMA_PINC_DISABLE;
  hdma_usart1_rx.Init.MemInc = DMA_MINC_ENABLE;
  hdma_usart1_rx.Init.PeriphDataAlignment = DMA_PDATAALIGN_BYTE;
  hdma_usart1_rx.Init.MemDataAlignment = DMA_MDATAALIGN_BYTE;
  hdma_usart1_rx.Init.Mode = DMA_NORMAL;
  hdma_usart1_rx.Init.Priority = DMA_PRIORITY_LOW;
  hdma_usart1_rx.Init.FIFOMode = DMA_FIFOMODE_DISABLE;
  if (HAL_DMA_Init(&hdma_usart1_rx) != HAL_OK)
    while(1);

  __HAL_LINKDMA(&huart1, hdmarx, hdma_usart1_rx);

  if (HAL_UART_Init(&huart1) != HAL_OK)
  while(1);
}

void dmaInit(void) {
  __HAL_RCC_DMA2_CLK_ENABLE();
  HAL_NVIC_SetPriority(DMA2_Stream2_IRQn, 0, 0);
  HAL_NVIC_EnableIRQ(DMA2_Stream2_IRQn);
}

void gpioInit(void) {
  GPIO_InitTypeDef GPIO_InitStruct;

  __HAL_RCC_GPIOB_CLK_ENABLE();

  // USART1 GPIO Configuration.
  // PB6 --> USART1_TX
  // PB7 --> USART1_RX 
  GPIO_InitStruct.Pin = GPIO_PIN_6 | GPIO_PIN_7;
  GPIO_InitStruct.Mode = GPIO_MODE_AF_PP;
  GPIO_InitStruct.Pull = GPIO_PULLUP;
  GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_VERY_HIGH;
  GPIO_InitStruct.Alternate = GPIO_AF7_USART1;
  HAL_GPIO_Init(GPIOB, &GPIO_InitStruct);
}

void setup() { 
  gpioInit();
  dmaInit();
  uartInit();
  pinMode(LED_BUILTIN, OUTPUT);
  HAL_UART_RegisterCallback(&huart1, HAL_UART_RX_COMPLETE_CB_ID, (pUART_CallbackTypeDef)myCallback);
}

void loop() {
  __HAL_UART_FLUSH_DRREGISTER(&huart1);

  while (1)
    HAL_UART_Receive_DMA(&huart1, (uint8_t *)data, DATA_LENGTH);
}
Posted in Uncategorized | Leave a comment