@misc{Hutter2014MultiprecisionMultiplicationOn,
  author        = {Michael Hutter and Peter Schwabe},
  title         = {Multiprecision multiplication on AVR revisited},
  howpublished  = {Cryptology ePrint Archive (\url{https://eprint.iacr.org/2014/592.pdf}), Report 2014/592},
  year          = {2014},
  keywords      = {Karatsuba multiplication, microcontroller, ATmega},
  abstract      = {This paper presents new speed records for multiprecision multiplication on the AVR ATmega family of 8-bit microcontrollers. For example, our software takes only 1976 cycles for the multiplication of two 160-bit integers; this is more than 15% faster than previous work. For 256-bit inputs, our software is not only the first to break through the 6000-cycle barrier; with only 4797 cycles it also breaks through the 5000-cycle barrier and is more than 21% faster than previous work. We achieve these speed records by carefully optimizing the Karatsuba multiplication technique for AVR ATmega. One might expect that subquadratic-complexity Karatsuba multiplication is only faster than algorithms with quadratic complexity for large inputs. This paper shows that it is in fact faster than fully unrolled product-scanning multiplication already for surprisingly small inputs, starting at 48 bits. Our results thus make Karatsuba multiplication the method of choice for high-performance implementations of elliptic-curve cryptography on AVR ATmega microcontrollers.}
}