import java.util.HashSet; public class CollGenerator { protected final HashFunc hashFunc; public CollGenerator(HashFunc hashFunc) { this.hashFunc = hashFunc; } public static void main(String[] args) { // final int TARGET_HASH_CODE = 0xBEEF; // or (1 << 20) final int TARGET_HASH_CODE = 0xFFFF; // or (1 << 20) // final int TARGET_HASH_CODE = (1 << 20); final int COLLISIONS_TO_GENERATE = 20; // first, Java default (seed=0, mult=31) // new CollGenerator(new MultPlusHashFunc(0, 31)) // new CollGenerator(new MultPlusHashFunc(0x77654321, 31)) new CollGenerator(new MultPlusHashFunc(0, 31)) .generate3(TARGET_HASH_CODE, COLLISIONS_TO_GENERATE, false); /* // then alternative, djb2 (see [http://www.cse.yorku.ca/~oz/hash.html]), // (see=5381, mult=33) new CollGenerator(new MultPlusHashFunc(5381, 33)) .generate3(1<<20, COLLISIONS_TO_GENERATE, false); new CollGenerator(new MultXorHashFunc(5381, 33)) .generate3(TARGET_HASH_CODE, COLLISIONS_TO_GENERATE, false); // one more, "sdbm" (from [http://www.cse.yorku.ca/~oz/hash.html] as well) new CollGenerator(new MultPlusHashFunc(5381, 65599)) .generate3(0xff0000, COLLISIONS_TO_GENERATE, false); new CollGenerator(new MultXorHashFunc(0, 65599)) .generate3(TARGET_HASH_CODE, COLLISIONS_TO_GENERATE, false); */ } /** * @param targetHash the hash code of the generated strings */ public void generate3(int targetHash, int maxEntries, boolean isStdHash) { final char minChar = 0x21; // after space // final int maxChar = Character.MAX_VALUE; final int maxChar = 0x7f; System.out.println("// target hash=0x"+Integer.toHexString(targetHash)+", with-> "+hashFunc); System.out.println("final static String[] COLLISIONS = {"); final HashFunc modHashFunc = hashFunc.withSeed(hashFunc.getSeed() + 0x7FFF); int count = 0; StringBuilder sb = new StringBuilder(); // first try simple analytic solutions... final HashSet<String> found = new HashSet<String>(); main_loop: for (int c0 = minChar; c0 <= maxChar; ++c0) { for (int c1 = minChar; c1 <= maxChar; ++c1) { // first, see if there's an "easy solution" int c2 = hashFunc.findLastChar(c0, c1, targetHash); // if (c2 < 0 || c2 > 0xFFFF) { if (c2 < minChar || c2 > maxChar) { continue; } String key = new String(new char[] { (char) c0, (char) c1, (char) c2 } ); // double-check for fun: if (isStdHash) { if (key.hashCode() != targetHash) { throw new RuntimeException("Should get STD hash of 0x"+Integer.toHexString(targetHash) +" for "+asQuoted(key)+"; instead got 0x"+Integer.toHexString(key.hashCode())); } } else { int actual = hashFunc.hashCode(key.charAt(0), key.charAt(1), key.charAt(2)); if (actual != targetHash) { throw new RuntimeException("Should get hash of 0x"+Integer.toHexString(targetHash) +" for "+asQuoted(key)+"; instead got 0x"+Integer.toHexString(actual)); } } found.add(key); sb.append(asQuoted(key)); // also, indicate alternate hash int altHash = modHashFunc.hashCode(key); sb.append("/*0x").append(Integer.toHexString(altHash)).append("*/"); if (++count >= maxEntries) { break main_loop; } sb.append(", "); if (sb.length() > 72) { System.out.println(sb.toString()); sb.setLength(0); } } } System.out.println(sb.toString()); // enough? if (found.size() < maxEntries) { System.out.println(" // not enough easy entries found... have to work harder?"); } System.out.println("};"); } private final static char[] HEX = "0123456789ABCDEF".toCharArray(); private String asQuoted(String s) { StringBuilder result = new StringBuilder().append('"'); for (int i = 0; i < s.length(); ++i) { char c = s.charAt(i); if (c == '"' || c == '\\') { result.append('\\'); result.append(c); } else if (c < 32 || c > 127) { result.append('\\').append('u'); result.append(HEX[(c >> 12)]); result.append(HEX[(c >> 8) & 0xF]); result.append(HEX[(c >> 4) & 0xF]); result.append(HEX[c & 0xF]); } else { result.append(c); } } return result.append('"').toString(); } abstract static class HashFunc { protected final int seed; protected HashFunc(int s) { seed = s; } public abstract HashFunc withSeed(int s); public int getSeed() { return seed; } public final int hashCode(String key) { return hashCode(key.charAt(0), key.charAt(1), key.charAt(2)); } public abstract int hashCode(int c0, int c1, int c2); public abstract int findLastChar(int c0, int c1, int targetHash); } abstract static class MultHashFunc extends HashFunc { protected final int multiplier; protected final int base; protected MultHashFunc(int s, int mult) { super(s); multiplier = mult; base = (s * mult); } } final static class MultPlusHashFunc extends MultHashFunc { protected final boolean multiplySeed; public MultPlusHashFunc(int s, int mult) { this(s, mult, false); } public MultPlusHashFunc(int s, int mult, boolean multSeed) { super(s, mult); multiplySeed = multSeed; } @Override public MultPlusHashFunc withSeed(int newSeed) { if (newSeed == seed) throw new IllegalArgumentException("Should not re-create with same seed"); return new MultPlusHashFunc(newSeed, multiplier, true); } @Override public int hashCode(int c0, int c1, int c2) { int h1 = multiplySeed ? (base * c0) : (base + c0); int h2 = (h1 * multiplier) + c1; return (h2 * multiplier) + c2; } @Override public int findLastChar(int c0, int c1, int targetHash) { int afterC1 = ((base + c0) * multiplier) + c1; int afterC1Mult = afterC1 * multiplier; // we know that 'hash = (afterC1 * MULT) + c2', // so ignoring overflow, easy solution would be // 'c2 = targetHash - (afterC1 * MULT)' if (afterC1Mult >= 0) { return targetHash - afterC1Mult; } // otherwise there's overflow; simple enough... return targetHash - (afterC1 * multiplier); } @Override public String toString() { return "seed: "+seed+", multiplier: "+multiplier+" (0x" +Integer.toHexString(multiplier)+", operation: +)"; } } final static class MultXorHashFunc extends MultHashFunc { public MultXorHashFunc(int s, int mult) { super(s, mult); } @Override public MultXorHashFunc withSeed(int newSeed) { if (newSeed == seed) throw new IllegalArgumentException("Should not re-create with same seed"); return new MultXorHashFunc(newSeed, multiplier); } @Override public int hashCode(int c0, int c1, int c2) { int h1 = base + c0; int h2 = (h1 * multiplier) ^ c1; return (h2 * multiplier) ^ c2; } @Override public int findLastChar(int c0, int c1, int targetHash) { int afterC1 = ((base + c0) * multiplier) ^ c1; // we know that 'hash = (afterC1 * MULT) ^ c2', // so ignoring overflow, easy solution would be // 'c2 = targetHash ^ (afterC1 * MULT)' return targetHash ^ (afterC1 * multiplier); } @Override public String toString() { return "seed: "+seed+", multiplier: "+multiplier+" (0x" +Integer.toHexString(multiplier)+", operation: ^)"; } } }