author        = {Michael Hutter and Erich Wenger},
  title         = {Fast Multi-Precision Multiplication for Public-Key Cryptography on Embedded Microprocessors},
  booktitle     = {Cryptographic Hardware and Embedded Systems -- CHES 2011, 13th International Workshop, Nara, Japan, September 28 - October 1},
  year          = {2011},
  editor        = {Bart Preneel und Tsuyoshi Takagi},
  volume        = {6917},
  series        = {Lecture Notes in Computer Science},
  pages         = {459--474},
  publisher     = {Springer},
  doi           = {10.1007/978-3-642-23951-9_30},
  keywords      = {Multi-precision Arithmetic, Microprocessors, Elliptic Curve Cryptography, RSA, Embedded Devices},
  url           = {http://www.springerlink.com/content/166hl834k55r5454/}
  abstract      = {Multi-precision multiplication is one of the most fundamental operations on microprocessors to allow public-key cryptography such as RSA and Elliptic Curve Cryptography (ECC). In this paper, we present a novel multiplication technique that increases the performance of multiplication by sophisticated caching of operands. Our method significantly reduces the number of needed load instructions which is usually one of the most expensive operation on modern processors. We evaluate our new technique on an 8-bit ATmega128 microcontroller and compare the result with existing solutions. Our implementation needs only 2,395 clock cycles for a 160-bit multiplication which outperforms related work by a factor of 10% to 23%. The number of required load instructions is reduced from 167 (needed for the best known hybrid multiplication) to only 80. Our implementation scales very well even for larger Integer sizes (required for RSA) and limited register sets. It further fully complies to existing multiply-accumulate instructions that are integrated in most of the available processors.}