/* (c) 2012 by Thomas Arend, 2012/10/25 * Purpose: Guess the radix for parseInt from the input * Assumption: Highest digit is used at least once * Input: parseInt coded file * Output: possible radix for decoding with parseInt * Return-Codes: * 0 everything well * 1 input was not tidy * 2 with radix r the blank is not the lowest character * 4 opening ( not as much as closing ) * 8 opening { not as much as closing } * * The Toolkit Blackhole codes a Java-Script * in the attribute values of a tag. * Every two characters are interpreted as an integer and * parsed with parseInt and fromCharCode into an new character * The radix for parseInt is obfuscated in the calling script. * Because we don't want to reassemble the obfuscated script * we have to guess the radix from the input. * * We assume that the highest digit is used in the input. * * That the highest digit is not used has a very low probability in * a large javascript. The useful range for the radix is 16 til 36. * * $Id: $ * $Log:$ */ #include <stdlib.h> #include <stdio.h> #include <string> #define MAXRADIX 36 #define MAXCHAR 256 using namespace std; char validdigits [MAXRADIX+1] = "0123456789abcdefghijklmnopqrstuvwxyz"; long usedchar [MAXCHAR] = { 0 } ; long statistic[MAXRADIX][MAXRADIX] = { 0 }; int validdigit (int digit ) { if ('0' <= digit && digit <= '9') return 1 ; else if ('a' <= digit && digit <= 'z' ) return 1; else if ( digit == 10 ) return 1; else return 0; } int digitindex (int digit ) { if ('0' <= digit && digit <= '9') return (digit - '0'); else if ('a' <= digit && digit <= 'z' ) return (digit -'a' + 10); else return 255; } // Check if the input file consisted only of 0-9, a-z int check_tidy_charset () { int isdirty = 0; int dirty = 0; for ( dirty = 1 ; dirty < MAXCHAR ; dirty++) { if ( !validdigit(dirty) && usedchar[dirty] ) { isdirty++; } } if (isdirty > 0) { printf ("Dirty characters %d\n", isdirty ) ; return 1; } else return 0; } // If the code contains blanks then the ' ' // should be the lowest cahracter. int blank_check ( int radix ) { int found = 0; int i = 0, j = 0; found = 0; for ( i = 0; i < MAXRADIX && !found ; i++ ) { for ( j = 0; j < MAXRADIX && !found ; j++ ) { found = statistic[i][j] > 0; } } if ((i-1)*radix + j-1 != ' ') { printf ("Blank check failed at [%d,%d] = %d\n", i-1 , j-1 , statistic[i-1][j-1]) ; return 2; } else return 0; } // The characters ( and ) should have equal counts. int partentheses_check ( int radix ) { if (statistic['(' / radix]['(' % radix] != statistic[')' / radix][')' % radix]) { printf ("Parentheses '()' check failed with %d,%d\n", statistic['(' / radix]['(' % radix] , statistic[')' / radix][')' % radix] ) ; return 4; } else return 0; } // The characters { and } should have equal counts. int curly_brace_check ( int radix ) { if (statistic['{' / radix]['{' % radix] != statistic['}' / radix]['}' % radix]) { printf ("Bracket '[]' check failed with %d,%d\n", statistic['{' / radix]['{' % radix] , statistic['}' / radix]['}' % radix] ) ; return 8; } else return 0; } int main ( int argc, char *argv[ ]) { int figure = 0; int previous = 0; int paired = 0; int radix = 0; int dirty = 0; int isdirty = 0; int found = 0; int i = 0, j = 0; int error = 0; // Count all characters paired = 0; while (( figure = getchar()) != EOF ) { usedchar[figure]++; if (paired) { i = digitindex(previous); j = digitindex(figure); if (i < 255 && j < 255) { statistic[i][j]++;} paired = 0; } else { paired = 1; previous = figure; } } // Seek highest character for ( figure = 255 ; ( figure > 0) && (usedchar[figure] == 0); figure-- ) {} // Print radix radix = digitindex (figure) + 1; printf ( "%d\n" , radix ); // Check input and guess error += check_tidy_charset(); error += blank_check(radix); error += partentheses_check(radix); error += curly_brace_check(radix); return error; }