Published on 13 Dec 2010. Tagged with php, algorithmicadvent.
Most people should be familiar with the Caesar cipher, a single alphabet substitution cipher. It is very easy to crack, even by hand. Writing an application to do this is a little harder, though. Here's my attempt at it.
/**
* A tool to crack Caesar cyphers
*
* - http://en.wikipedia.org/wiki/Caesar_cipher
*
* This article on language detection might come in handy at some point in the
* future:
*
* - http://phpir.com/language-detection-with-n-grams
*
* @version 2010-Dec-13
* @author Marc Ermshaus <http://www.ermshaus.org/>
* @license GNU General Public License (version 3 or later)
* <http://www.gnu.org/licenses/gpl.html>
*/
class CaesarCracker
{
/**
* Taken from http://de.wikipedia.org/wiki/Buchstabenhäufigkeit
*
* Here's the same thing for English:
* http://en.wikipedia.org/wiki/Letter_frequency
* #Relative_frequencies_of_letters_in_the_English_language
*
* @var Relative frequency of letters in the German language
*/
protected $probabilityMap = array(
'a' => 0.0651,
'b' => 0.0189,
'c' => 0.0306,
'd' => 0.0508,
'e' => 0.1740,
'f' => 0.0166,
'g' => 0.0301,
'h' => 0.0476,
'i' => 0.0755,
'j' => 0.027,
'k' => 0.0121,
'l' => 0.0344,
'm' => 0.0253,
'n' => 0.0251,
'o' => 0.0978,
'p' => 0.0079,
'q' => 0.0002,
'r' => 0.0700,
's' => 0.0727,
't' => 0.0615,
'u' => 0.0435,
'v' => 0.0067,
'w' => 0.0189,
'x' => 0.0003,
'y' => 0.0004,
'z' => 0.0113,
);
/**
*
*/
public function __construct()
{
arsort($this->probabilityMap);
}
/**
* Calculates a table of relative letter frequencies for the input
*
* @param string $input Data to create table for
* @return array Letter distribution table
*/
protected function createDistributionTable($input)
{
$tmp = array();
$l = strlen($input);
$realLength = 0;
$charlist = range('a', 'z');
for ($i = 0; $i < $l; $i++) {
$c = substr($input, $i, 1);
if (!in_array($c, $charlist)) {
continue;
}
$realLength++;
if (isset($tmp[$c])) {
$tmp[$c]++;
} else {
$tmp[$c] = 1;
}
}
foreach ($tmp as $char => &$amount) {
$amount /= $realLength;
}
unset($amount);
arsort($tmp);
return $tmp;
}
/**
* Calculates by which amount the passed test distribution differs from the
* expected distribution
*
* @param array $dt Test distribution
* @return float Amount of "badness". Lower values are *better*
*/
protected function calculateWeightedBadness(array $dt)
{
$pm = $this->probabilityMap;
$badness = 0;
$i = 0;
foreach ($dt as $char => $probability) {
$badness += abs($pm[$char] - $probability);
$i++;
}
return $badness;
}
/**
* Shifts the input by specified offset
*
* abcdefghijklmnopqrstuvwxyz shifted by 4 becomes
* efghijklmnopqrstuvwxyzabcd
*
* @param string $input String to encrypt
* @param int $offset Offset to shift by
* @return string Encrypted string
*/
public function caesarShift($input, $offset)
{
$ret = '';
$shiftingTable = implode(range('a', 'z')) . implode(range('a', 'z'));
$charlist = range('a', 'z');
$chars = str_split($input);
foreach ($chars as $char) {
if (in_array($char, $charlist)) {
$ret .= substr($shiftingTable,
strpos($shiftingTable, $char) + $offset, 1);
} else {
$ret .= $char;
}
}
return $ret;
}
/**
* Attempts to crack an enciphered string
*
* @param string $input The enciphered string (all lower-case)
* @return string Original string (hopefully)
*/
public function crack($input)
{
print_r($pm);
$badnesses = array();
$shiftingTable = implode(range('a', 'z')) . implode(range('a', 'z'));
$dt = $this->createDistributionTable($input);
for ($offset = 0; $offset <= 25; $offset++) {
$tmp = array();
foreach ($dt as $char => $probability) {
$tmp[substr($shiftingTable,
strpos($shiftingTable, $char) + $offset, 1)] = $probability;
}
$badnesses[$offset] = $this->calculateWeightedBadness($tmp);
}
asort($badnesses);
$keys = array_keys($badnesses);
return $this->caesarShift($input, $keys[0]);
}
}
// Test string (in German)
$input = 'Aufklaerung ist der Ausgang des Menschen aus seiner selbst'
. ' verschuldeten Unmuendigkeit.';
// Cracker works only with lower-case letters
$input = strtolower($input);
$c = new CaesarCracker();
// Shift input by a random offset
$shifted = $c->caesarShift($input, mt_rand(1, 13));
echo 'Shifted: ' . $shifted . '<br/>';
echo 'Cracked: ' . $c->crack($shifted);