Marc Ermshaus’ avatar

Marc Ermshaus

Linkblog

Algorithmic Advent: 13 – PHP Caesar cipher generator/cracker

Published on 13 Dec 2010. Tagged with php, algorithmicadvent.

Most people should be familiar with the Caesar cipher, a single alphabet substitution cipher. It is very easy to crack, even by hand. Writing an application to do this is a little harder, though. Here's my attempt at it.

/**
 * A tool to crack Caesar cyphers
 * 
 *   - http://en.wikipedia.org/wiki/Caesar_cipher
 *
 * This article on language detection might come in handy at some point in the
 * future:
 *
 *   - http://phpir.com/language-detection-with-n-grams
 *
 * @version 2010-Dec-13
 * @author Marc Ermshaus <http://www.ermshaus.org/>
 * @license GNU General Public License (version 3 or later)
 *          <http://www.gnu.org/licenses/gpl.html>
 */
class CaesarCracker
{
    /**
     * Taken from http://de.wikipedia.org/wiki/Buchstabenhäufigkeit
     * 
     * Here's the same thing for English:
     *     http://en.wikipedia.org/wiki/Letter_frequency
     *         #Relative_frequencies_of_letters_in_the_English_language
     *
     * @var Relative frequency of letters in the German language
     */
    protected $probabilityMap = array(
        'a' =>  0.0651,
        'b' =>  0.0189,
        'c' =>  0.0306,
        'd' =>  0.0508,
        'e' =>  0.1740,
        'f' =>  0.0166,
        'g' =>  0.0301,
        'h' =>  0.0476,
        'i' =>  0.0755,
        'j' =>  0.027,
        'k' =>  0.0121,
        'l' =>  0.0344,
        'm' =>  0.0253,
        'n' =>  0.0251,
        'o' =>  0.0978,
        'p' =>  0.0079,
        'q' =>  0.0002,
        'r' =>  0.0700,
        's' =>  0.0727,
        't' =>  0.0615,
        'u' =>  0.0435,
        'v' =>  0.0067,
        'w' =>  0.0189,
        'x' =>  0.0003,
        'y' =>  0.0004,
        'z' =>  0.0113,
    );

    /**
     * 
     */
    public function __construct()
    {
        arsort($this->probabilityMap);
    }

    /**
     * Calculates a table of relative letter frequencies for the input
     *
     * @param string $input Data to create table for
     * @return array Letter distribution table
     */
    protected function createDistributionTable($input)
    {
        $tmp = array();

        $l = strlen($input);
        $realLength = 0;

        $charlist = range('a', 'z');

        for ($i = 0; $i < $l; $i++) {
            $c = substr($input, $i, 1);

            if (!in_array($c, $charlist)) {
                continue;
            }

            $realLength++;

            if (isset($tmp[$c])) {
                $tmp[$c]++;
            } else {
                $tmp[$c] = 1;
            }
        }

        foreach ($tmp as $char => &$amount) {
            $amount /= $realLength;
        }
        unset($amount);

        arsort($tmp);

        return $tmp;
    }

    /**
     * Calculates by which amount the passed test distribution differs from the
     * expected distribution
     *
     * @param array $dt Test distribution
     * @return float Amount of "badness". Lower values are *better*
     */
    protected function calculateWeightedBadness(array $dt)
    {
        $pm = $this->probabilityMap;
        $badness = 0;

        $i = 0;

        foreach ($dt as $char => $probability) {
            $badness += abs($pm[$char] - $probability);
            $i++;
        }

        return $badness;
    }

    /**
     * Shifts the input by specified offset
     *
     * abcdefghijklmnopqrstuvwxyz shifted by 4 becomes
     * efghijklmnopqrstuvwxyzabcd
     *
     * @param string $input String to encrypt
     * @param int $offset Offset to shift by
     * @return string Encrypted string
     */
    public function caesarShift($input, $offset)
    {
        $ret = '';

        $shiftingTable = implode(range('a', 'z')) . implode(range('a', 'z'));

        $charlist = range('a', 'z');

        $chars = str_split($input);

        foreach ($chars as $char) {

            if (in_array($char, $charlist)) {
                $ret .= substr($shiftingTable,
                    strpos($shiftingTable, $char) + $offset, 1);
            } else {
                $ret .= $char;
            }

            
        }

        return $ret;
    }

    /**
     * Attempts to crack an enciphered string
     *
     * @param string $input The enciphered string (all lower-case)
     * @return string Original string (hopefully)
     */
    public function crack($input)
    {
        print_r($pm);

        $badnesses = array();

        $shiftingTable = implode(range('a', 'z')) . implode(range('a', 'z'));

        $dt = $this->createDistributionTable($input);

        for ($offset = 0; $offset <= 25; $offset++) {
            $tmp = array();

            foreach ($dt as $char => $probability) {
                $tmp[substr($shiftingTable,
                    strpos($shiftingTable, $char) + $offset, 1)] = $probability;
            }

            $badnesses[$offset] = $this->calculateWeightedBadness($tmp);
        }

        asort($badnesses);

        $keys = array_keys($badnesses);

        return $this->caesarShift($input, $keys[0]);
    }
}



// Test string (in German)
$input = 'Aufklaerung ist der Ausgang des Menschen aus seiner selbst'
       . ' verschuldeten Unmuendigkeit.';

// Cracker works only with lower-case letters
$input = strtolower($input);

$c = new CaesarCracker();

// Shift input by a random offset
$shifted = $c->caesarShift($input, mt_rand(1, 13));

echo 'Shifted: ' . $shifted . '<br/>';
echo 'Cracked: ' . $c->crack($shifted);