BL/TF optimizations. 3DES: +20% speed

- Blowfish/Twofish small optimizations

- 3DES +20% speedup from Patrick's commit:
f596c577cf
This commit is contained in:
Hans-Jürgen Petrich 2013-05-23 01:11:08 +07:00
parent 55ff00cc35
commit 72fa880aa2
3 changed files with 141 additions and 112 deletions

View File

@ -587,17 +587,13 @@ class Crypt_Blowfish extends Crypt_Base {
break; break;
default: default:
$p = array(); $p = array();
$pin = '';
for ($i = 0; $i < 18; ++$i) { for ($i = 0; $i < 18; ++$i) {
$p[] = '$p_' . $i; $p[] = '$p_' . $i;
$pin.= '$p_' . $i . ' = $self->bctx["p"][' . $i . '];' . "\n";
} }
$init_crypt = ' $init_crypt = '
$sb_0 = $self->bctx["sb"][0]; list($sb_0, $sb_1, $sb_2, $sb_3) = $self->bctx["sb"];
$sb_1 = $self->bctx["sb"][1]; list(' . implode(',', $p) . ') = $self->bctx["p"];
$sb_2 = $self->bctx["sb"][2];
$sb_3 = $self->bctx["sb"][3];
' . $pin . '
'; ';
} }

View File

@ -759,37 +759,43 @@ class Crypt_DES extends Crypt_Base {
*/ */
function _processBlock($block, $mode) function _processBlock($block, $mode)
{ {
$shuffle = $this->shuffle; static $sbox1, $sbox2, $sbox3, $sbox4, $sbox5, $sbox6, $sbox7, $sbox8, $shuffleip, $shuffleinvip;
$invipmap = $this->invipmap; if (!$sbox1) {
$ipmap = $this->ipmap; $sbox1 = array_map("intval", $this->sbox1);
$sbox1 = $this->sbox1; $sbox2 = array_map("intval", $this->sbox2);
$sbox2 = $this->sbox2; $sbox3 = array_map("intval", $this->sbox3);
$sbox3 = $this->sbox3; $sbox4 = array_map("intval", $this->sbox4);
$sbox4 = $this->sbox4; $sbox5 = array_map("intval", $this->sbox5);
$sbox5 = $this->sbox5; $sbox6 = array_map("intval", $this->sbox6);
$sbox6 = $this->sbox6; $sbox7 = array_map("intval", $this->sbox7);
$sbox7 = $this->sbox7; $sbox8 = array_map("intval", $this->sbox8);
$sbox8 = $this->sbox8; /* Merge $shuffle with $[inv]ipmap */
for ($i = 0; $i < 256; ++$i) {
$shuffleip[] = $this->shuffle[$this->ipmap[$i]];
$shuffleinvip[] = $this->shuffle[$this->invipmap[$i]];
}
}
$keys = $this->keys[$mode]; $keys = $this->keys[$mode];
$ki = -1;
// Do the initial IP permutation.
$t = unpack('Nl/Nr', $block);
list($l, $r) = array($t['l'], $t['r']);
$block = ($shuffleip[ $r & 0xFF] & "\x80\x80\x80\x80\x80\x80\x80\x80") |
($shuffleip[($r >> 8) & 0xFF] & "\x40\x40\x40\x40\x40\x40\x40\x40") |
($shuffleip[($r >> 16) & 0xFF] & "\x20\x20\x20\x20\x20\x20\x20\x20") |
($shuffleip[($r >> 24) & 0xFF] & "\x10\x10\x10\x10\x10\x10\x10\x10") |
($shuffleip[ $l & 0xFF] & "\x08\x08\x08\x08\x08\x08\x08\x08") |
($shuffleip[($l >> 8) & 0xFF] & "\x04\x04\x04\x04\x04\x04\x04\x04") |
($shuffleip[($l >> 16) & 0xFF] & "\x02\x02\x02\x02\x02\x02\x02\x02") |
($shuffleip[($l >> 24) & 0xFF] & "\x01\x01\x01\x01\x01\x01\x01\x01");
// Extract L0 and R0.
$t = unpack('Nl/Nr', $block);
list($l, $r) = array($t['l'], $t['r']);
$ki = -1;
for ($des_round = 0; $des_round < $this->des_rounds; ++$des_round) { for ($des_round = 0; $des_round < $this->des_rounds; ++$des_round) {
// Do the initial IP permutation.
$t = unpack('Nl/Nr', $block);
list($l, $r) = array($t['l'], $t['r']);
$block = ($shuffle[$ipmap[ $r & 0xFF]] & "\x80\x80\x80\x80\x80\x80\x80\x80") |
($shuffle[$ipmap[($r >> 8) & 0xFF]] & "\x40\x40\x40\x40\x40\x40\x40\x40") |
($shuffle[$ipmap[($r >> 16) & 0xFF]] & "\x20\x20\x20\x20\x20\x20\x20\x20") |
($shuffle[$ipmap[($r >> 24) & 0xFF]] & "\x10\x10\x10\x10\x10\x10\x10\x10") |
($shuffle[$ipmap[ $l & 0xFF]] & "\x08\x08\x08\x08\x08\x08\x08\x08") |
($shuffle[$ipmap[($l >> 8) & 0xFF]] & "\x04\x04\x04\x04\x04\x04\x04\x04") |
($shuffle[$ipmap[($l >> 16) & 0xFF]] & "\x02\x02\x02\x02\x02\x02\x02\x02") |
($shuffle[$ipmap[($l >> 24) & 0xFF]] & "\x01\x01\x01\x01\x01\x01\x01\x01");
// Extract L0 and R0.
$t = unpack('Nl/Nr', $block);
list($l, $r) = array($t['l'], $t['r']);
// Perform the 16 steps. // Perform the 16 steps.
for ($i = 0; $i < 16; $i++) { for ($i = 0; $i < 16; $i++) {
// start of "the Feistel (F) function" - see the following URL: // start of "the Feistel (F) function" - see the following URL:
@ -809,17 +815,21 @@ class Crypt_DES extends Crypt_Base {
$r = $t; $r = $t;
} }
// Perform the inverse IP permutation. // Last step should not permute L & R.
$block = ($shuffle[$invipmap[($l >> 24) & 0xFF]] & "\x80\x80\x80\x80\x80\x80\x80\x80") | $t = $l;
($shuffle[$invipmap[($r >> 24) & 0xFF]] & "\x40\x40\x40\x40\x40\x40\x40\x40") | $l = $r;
($shuffle[$invipmap[($l >> 16) & 0xFF]] & "\x20\x20\x20\x20\x20\x20\x20\x20") | $r = $t;
($shuffle[$invipmap[($r >> 16) & 0xFF]] & "\x10\x10\x10\x10\x10\x10\x10\x10") |
($shuffle[$invipmap[($l >> 8) & 0xFF]] & "\x08\x08\x08\x08\x08\x08\x08\x08") |
($shuffle[$invipmap[($r >> 8) & 0xFF]] & "\x04\x04\x04\x04\x04\x04\x04\x04") |
($shuffle[$invipmap[ $l & 0xFF]] & "\x02\x02\x02\x02\x02\x02\x02\x02") |
($shuffle[$invipmap[ $r & 0xFF]] & "\x01\x01\x01\x01\x01\x01\x01\x01");
} }
return $block;
// Perform the inverse IP permutation.
return ($shuffleinvip[($r >> 24) & 0xFF] & "\x80\x80\x80\x80\x80\x80\x80\x80") |
($shuffleinvip[($l >> 24) & 0xFF] & "\x40\x40\x40\x40\x40\x40\x40\x40") |
($shuffleinvip[($r >> 16) & 0xFF] & "\x20\x20\x20\x20\x20\x20\x20\x20") |
($shuffleinvip[($l >> 16) & 0xFF] & "\x10\x10\x10\x10\x10\x10\x10\x10") |
($shuffleinvip[($r >> 8) & 0xFF] & "\x08\x08\x08\x08\x08\x08\x08\x08") |
($shuffleinvip[($l >> 8) & 0xFF] & "\x04\x04\x04\x04\x04\x04\x04\x04") |
($shuffleinvip[ $r & 0xFF] & "\x02\x02\x02\x02\x02\x02\x02\x02") |
($shuffleinvip[ $l & 0xFF] & "\x01\x01\x01\x01\x01\x01\x01\x01");
} }
/** /**
@ -1364,37 +1374,56 @@ class Crypt_DES extends Crypt_Base {
function inline_crypt_setup() function inline_crypt_setup()
{ {
$lambda_functions =& Crypt_DES::get_lambda_functions(); $lambda_functions =& Crypt_DES::get_lambda_functions();
// Engine configuration for:
// - DES ($des_rounds == 1) or
// - 3DES ($des_rounds == 3)
$des_rounds = $this->des_rounds; $des_rounds = $this->des_rounds;
// We create max. 10 hi-optimized code for memory reason. Means: For each $key one ultra fast inline-crypt function. // We create max. 10 hi-optimized code for memory reason. Means: For each $key one ultra fast inline-crypt function.
// After that, we'll still create very fast optimized code but not the hi-ultimative code, for each $mode one // After that, we'll still create very fast optimized code but not the hi-ultimative code, for each $mode one
$gen_hi_opt_code = (bool)( count($lambda_functions) < 10 ); $gen_hi_opt_code = (bool)( count($lambda_functions) < 10 );
// Generation of a uniqe hash for our generated code
switch (true) { switch (true) {
case $gen_hi_opt_code: case $gen_hi_opt_code:
// For hi-optimized code, we create for each combination of
// $mode, $des_rounds and $this->key its own encrypt/decrypt function.
$code_hash = md5(str_pad("Crypt_DES, $des_rounds, {$this->mode}, ", 32, "\0") . $this->key); $code_hash = md5(str_pad("Crypt_DES, $des_rounds, {$this->mode}, ", 32, "\0") . $this->key);
break; break;
default: default:
// After max 10 hi-optimized functions, we create generic
// (still very fast.. but not ultra) functions for each $mode/$des_rounds
// Currently 2 * 5 generic functions will be then max. possible.
$code_hash = "Crypt_DES, $des_rounds, {$this->mode}"; $code_hash = "Crypt_DES, $des_rounds, {$this->mode}";
} }
// Is there a re-usable $lambda_functions in there? If not, we have to create it.
if (!isset($lambda_functions[$code_hash])) { if (!isset($lambda_functions[$code_hash])) {
$init_crypt = ' // Init code for both, encrypt and decrypt.
$shuffle = $self->shuffle; $init_crypt = 'static $sbox1, $sbox2, $sbox3, $sbox4, $sbox5, $sbox6, $sbox7, $sbox8, $shuffleip, $shuffleinvip;
$invipmap = $self->invipmap; if (!$sbox1) {
$ipmap = $self->ipmap; $sbox1 = array_map("intval", $self->sbox1);
$sbox1 = $self->sbox1; $sbox2 = array_map("intval", $self->sbox2);
$sbox2 = $self->sbox2; $sbox3 = array_map("intval", $self->sbox3);
$sbox3 = $self->sbox3; $sbox4 = array_map("intval", $self->sbox4);
$sbox4 = $self->sbox4; $sbox5 = array_map("intval", $self->sbox5);
$sbox5 = $self->sbox5; $sbox6 = array_map("intval", $self->sbox6);
$sbox6 = $self->sbox6; $sbox7 = array_map("intval", $self->sbox7);
$sbox7 = $self->sbox7; $sbox8 = array_map("intval", $self->sbox8);'
$sbox8 = $self->sbox8; /* Merge $shuffle with $[inv]ipmap */ . '
for ($i = 0; $i < 256; ++$i) {
$shuffleip[] = $self->shuffle[$self->ipmap[$i]];
$shuffleinvip[] = $self->shuffle[$self->invipmap[$i]];
}
}
'; ';
switch (true) { switch (true) {
case $gen_hi_opt_code: case $gen_hi_opt_code:
// In Hi-optimized code mode, we use our [3]DES key schedule as hardcoded integers.
// No futher initialisation of the $keys schedule is necessary.
// That is the extra performance boost.
$k = array( $k = array(
CRYPT_DES_ENCRYPT => $this->keys[CRYPT_DES_ENCRYPT], CRYPT_DES_ENCRYPT => $this->keys[CRYPT_DES_ENCRYPT],
CRYPT_DES_DECRYPT => $this->keys[CRYPT_DES_DECRYPT] CRYPT_DES_DECRYPT => $this->keys[CRYPT_DES_DECRYPT]
@ -1403,6 +1432,8 @@ class Crypt_DES extends Crypt_Base {
$init_decrypt = ''; $init_decrypt = '';
break; break;
default: default:
// In generic optimized code mode, we have to use, as the best compromise [currently],
// our key schedule as $ke/$kd arrays. (with hardcoded indexes...)
$k = array( $k = array(
CRYPT_DES_ENCRYPT => array(), CRYPT_DES_ENCRYPT => array(),
CRYPT_DES_DECRYPT => array() CRYPT_DES_DECRYPT => array()
@ -1416,69 +1447,71 @@ class Crypt_DES extends Crypt_Base {
break; break;
} }
// Creating code for en- and decryption.
$crypt_block = array(); $crypt_block = array();
foreach (array(CRYPT_DES_ENCRYPT, CRYPT_DES_DECRYPT) as $c) { foreach (array(CRYPT_DES_ENCRYPT, CRYPT_DES_DECRYPT) as $c) {
$crypt_block[$c] = '$in = unpack("N*", $in);'."\n"; /* Do the initial IP permutation. */
$crypt_block[$c] = '
$in = unpack("N*", $in);
$l = $in[1];
$r = $in[2];
$in = unpack("N*",
($shuffleip[ $r & 0xFF] & "\x80\x80\x80\x80\x80\x80\x80\x80") |
($shuffleip[($r >> 8) & 0xFF] & "\x40\x40\x40\x40\x40\x40\x40\x40") |
($shuffleip[($r >> 16) & 0xFF] & "\x20\x20\x20\x20\x20\x20\x20\x20") |
($shuffleip[($r >> 24) & 0xFF] & "\x10\x10\x10\x10\x10\x10\x10\x10") |
($shuffleip[ $l & 0xFF] & "\x08\x08\x08\x08\x08\x08\x08\x08") |
($shuffleip[($l >> 8) & 0xFF] & "\x04\x04\x04\x04\x04\x04\x04\x04") |
($shuffleip[($l >> 16) & 0xFF] & "\x02\x02\x02\x02\x02\x02\x02\x02") |
($shuffleip[($l >> 24) & 0xFF] & "\x01\x01\x01\x01\x01\x01\x01\x01")
);
' . /* Extract L0 and R0 */ '
$l = $in[1];
$r = $in[2];
';
$l = '$l';
$r = '$r';
// Perform DES or 3DES.
for ($ki = -1, $des_round = 0; $des_round < $des_rounds; ++$des_round) { for ($ki = -1, $des_round = 0; $des_round < $des_rounds; ++$des_round) {
// Do the initial IP permutation.
$crypt_block[$c].= '
$l = $in[1];
$r = $in[2];
$in = unpack("N*",
($shuffle[$ipmap[ $r & 0xFF]] & "\x80\x80\x80\x80\x80\x80\x80\x80") |
($shuffle[$ipmap[($r >> 8) & 0xFF]] & "\x40\x40\x40\x40\x40\x40\x40\x40") |
($shuffle[$ipmap[($r >> 16) & 0xFF]] & "\x20\x20\x20\x20\x20\x20\x20\x20") |
($shuffle[$ipmap[($r >> 24) & 0xFF]] & "\x10\x10\x10\x10\x10\x10\x10\x10") |
($shuffle[$ipmap[ $l & 0xFF]] & "\x08\x08\x08\x08\x08\x08\x08\x08") |
($shuffle[$ipmap[($l >> 8) & 0xFF]] & "\x04\x04\x04\x04\x04\x04\x04\x04") |
($shuffle[$ipmap[($l >> 16) & 0xFF]] & "\x02\x02\x02\x02\x02\x02\x02\x02") |
($shuffle[$ipmap[($l >> 24) & 0xFF]] & "\x01\x01\x01\x01\x01\x01\x01\x01")
);
'.'' /* Extract L0 and R0 */ .'
$l = $in[1];
$r = $in[2];
';
// Perform the 16 steps. // Perform the 16 steps.
// start of "the Feistel (F) function" - see the following URL: for ($i = 0; $i < 16; ++$i) {
// http://en.wikipedia.org/wiki/Image:Data_Encryption_Standard_InfoBox_Diagram.png // start of "the Feistel (F) function" - see the following URL:
// Merge key schedule. // http://en.wikipedia.org/wiki/Image:Data_Encryption_Standard_InfoBox_Diagram.png
for ($i = 0; $i < 8; ++$i) { // Merge key schedule.
$crypt_block[$c].= ' $crypt_block[$c].= '
$b1 = (($r >> 3) & 0x1FFFFFFF) ^ ($r << 29) ^ ' .$k[$c][++$ki] . '; $b1 = ((' . $r . ' >> 3) & 0x1FFFFFFF) ^ (' . $r . ' << 29) ^ ' . $k[$c][++$ki] . ';
$b2 = (($r >> 31) & 0x00000001) ^ ($r << 1) ^ ' .$k[$c][++$ki] . '; $b2 = ((' . $r . ' >> 31) & 0x00000001) ^ (' . $r . ' << 1) ^ ' . $k[$c][++$ki] . ';' .
$l = $sbox1[($b1 >> 24) & 0x3F] ^ $sbox2[($b2 >> 24) & 0x3F] ^ /* S-box indexing. */
$sbox3[($b1 >> 16) & 0x3F] ^ $sbox4[($b2 >> 16) & 0x3F] ^ $l . ' = $sbox1[($b1 >> 24) & 0x3F] ^ $sbox2[($b2 >> 24) & 0x3F] ^
$sbox5[($b1 >> 8) & 0x3F] ^ $sbox6[($b2 >> 8) & 0x3F] ^ $sbox3[($b1 >> 16) & 0x3F] ^ $sbox4[($b2 >> 16) & 0x3F] ^
$sbox7[ $b1 & 0x3F] ^ $sbox8[ $b2 & 0x3F] ^ $l; $sbox5[($b1 >> 8) & 0x3F] ^ $sbox6[($b2 >> 8) & 0x3F] ^
$sbox7[ $b1 & 0x3F] ^ $sbox8[ $b2 & 0x3F] ^ ' . $l . ';
$b1 = (($l >> 3) & 0x1FFFFFFF) ^ ($l << 29) ^ ' .$k[$c][++$ki] . ';
$b2 = (($l >> 31) & 0x00000001) ^ ($l << 1) ^ ' .$k[$c][++$ki] . ';
$r = $sbox1[($b1 >> 24) & 0x3F] ^ $sbox2[($b2 >> 24) & 0x3F] ^
$sbox3[($b1 >> 16) & 0x3F] ^ $sbox4[($b2 >> 16) & 0x3F] ^
$sbox5[($b1 >> 8) & 0x3F] ^ $sbox6[($b2 >> 8) & 0x3F] ^
$sbox7[ $b1 & 0x3F] ^ $sbox8[ $b2 & 0x3F] ^ $r;
'; ';
} // end of "the Feistel (F) function"
// Perform the inverse IP permutation. // swap L & R
$crypt_block[$c].= '$in = ' . ($des_round == $des_rounds - 1 ? '(' : 'unpack("N*",') . ' list($l, $r) = array($r, $l);
($shuffle[$invipmap[($l >> 24) & 0xFF]] & "\x80\x80\x80\x80\x80\x80\x80\x80") | }
($shuffle[$invipmap[($r >> 24) & 0xFF]] & "\x40\x40\x40\x40\x40\x40\x40\x40") | list($l, $r) = array($r, $l);
($shuffle[$invipmap[($l >> 16) & 0xFF]] & "\x20\x20\x20\x20\x20\x20\x20\x20") |
($shuffle[$invipmap[($r >> 16) & 0xFF]] & "\x10\x10\x10\x10\x10\x10\x10\x10") |
($shuffle[$invipmap[($l >> 8) & 0xFF]] & "\x08\x08\x08\x08\x08\x08\x08\x08") |
($shuffle[$invipmap[($r >> 8) & 0xFF]] & "\x04\x04\x04\x04\x04\x04\x04\x04") |
($shuffle[$invipmap[ $l & 0xFF]] & "\x02\x02\x02\x02\x02\x02\x02\x02") |
($shuffle[$invipmap[ $r & 0xFF]] & "\x01\x01\x01\x01\x01\x01\x01\x01")
);
';
} }
// Perform the inverse IP permutation.
$crypt_block[$c].= '$in =
($shuffleinvip[($l >> 24) & 0xFF] & "\x80\x80\x80\x80\x80\x80\x80\x80") |
($shuffleinvip[($r >> 24) & 0xFF] & "\x40\x40\x40\x40\x40\x40\x40\x40") |
($shuffleinvip[($l >> 16) & 0xFF] & "\x20\x20\x20\x20\x20\x20\x20\x20") |
($shuffleinvip[($r >> 16) & 0xFF] & "\x10\x10\x10\x10\x10\x10\x10\x10") |
($shuffleinvip[($l >> 8) & 0xFF] & "\x08\x08\x08\x08\x08\x08\x08\x08") |
($shuffleinvip[($r >> 8) & 0xFF] & "\x04\x04\x04\x04\x04\x04\x04\x04") |
($shuffleinvip[ $l & 0xFF] & "\x02\x02\x02\x02\x02\x02\x02\x02") |
($shuffleinvip[ $r & 0xFF] & "\x01\x01\x01\x01\x01\x01\x01\x01");
';
} }
// Creates the inline-crypt function
$lambda_functions[$code_hash] = $this->createInlineCryptFunction( $lambda_functions[$code_hash] = $this->createInlineCryptFunction(
array( array(
'init_crypt' => $init_crypt, 'init_crypt' => $init_crypt,
@ -1489,6 +1522,8 @@ class Crypt_DES extends Crypt_Base {
) )
); );
} }
// Set the inline-crypt function as callback in: $this->inline_crypt
$this->inline_crypt = $lambda_functions[$code_hash]; $this->inline_crypt = $lambda_functions[$code_hash];
} }
} }

View File

@ -804,10 +804,8 @@ class Crypt_Twofish extends Crypt_Base {
break; break;
default: default:
$K = array(); $K = array();
$Kin = '';
for ($i = 0; $i < 40; ++$i) { for ($i = 0; $i < 40; ++$i) {
$K[] = '$K_' . $i; $K[] = '$K_' . $i;
$Kin.= '$K_' . $i . ' = $self->K[' . $i . '];' . "\n";
} }
$init_crypt = ' $init_crypt = '
@ -815,7 +813,7 @@ class Crypt_Twofish extends Crypt_Base {
$S1 = $self->S1; $S1 = $self->S1;
$S2 = $self->S2; $S2 = $self->S2;
$S3 = $self->S3; $S3 = $self->S3;
' . $Kin . ' list(' . implode(',', $K) . ') = $self->K;
'; ';
} }