/** * A look-up utility for converting to and from DNA sequences and * amino acids. Hard-coded so there is no overhead (or benefit) * due to the Java Collections interface/libraries.

* * I wrote this for fun and to demonstrate for computer science * folks how simple are the underpinnings of the genetic code. * Compiles to c. 2,800 bytes. Uses an int array to link the * two String arrays. Also for fun, I included the ability to * list all the amino acids that start with just a single DNA * base pair.

* * No threading issues as no variables are changed (outside the * constructor), but you might want to rethink the Singleton * design, especially if you want to improve performance on multiple * processor platforms. Thinking of potential improvements to run-time * performance, one could use knowledge of the frequency of each * amino acid to re-order the list of amino acids and thereby shorten * the look-up times. Same for the DNA codons. (I did not take time * to look at either of these.) And the issue of whether the code * produced by the Java JIT compiler is as good as what one could get * with C or assembler code for the same thing seems interesting.

* * Other questions in this area are suggested by code like this. Like, * how many different DNA sequences there are which would code for * a known protein of say, 200 amino acids. And, if someone patents * a DNA sequence, well, let's just use code like this to produce * millions of unpatented sequences that produce the same protein. ;)

* * A good primer on DNA and protein synthesis is * here. An interesting table of Amino Acids is * here. * * Freeware: not warranted in any way as to fitness for a particular * purpose, implied or not. Use at your own risk.
* @version 1.0 2005/03/23 * @author Tony Dahlman */ public class TwoWayLookUp { /** Use getInstance() to load (lazy singleton pattern) */ private static TwoWayLookUp instance = null; /** All the amino acids as strings */ protected static final String[] aminoAcids = { "Phe", "Leu", "Ile", "Met", "Val", "Ser", "Pro", "Thr", "Ala", "Tyr", "His", "Gln", "Asn", "Lys", "Asp", "Glu", "Cys", "Trp", "Arg", "Gly", "STOP" }; /** * All the DNA bases as single characters */ protected static final char[] bases = { 'T', 'C', 'A', 'G' }; /** * DNA triplets or codons. Initialized in the constructor, but * you could hard code this if you're really serious. */ protected static String[] triplets = null; /** * The DNA translation code, associating triplets with amino acids */ protected static final int[] code = { 0, 0, 1, 1, 5, 5, 5, 5, 9, 9, 20, 20, 16, 16, 20, 17, 1, 1, 1, 1, 6, 6, 6, 6, 10, 10, 11, 11, 18, 18, 18, 18, 2, 2, 2, 3, 7, 7, 7, 7, 12, 12, 13, 13, 5, 5, 18, 18, 4, 4, 4, 4, 8, 8, 8, 8, 14, 14, 15, 15, 19, 19, 19, 19 }; /** * The number of triplets for each amino acid (or "stop") */ protected static final int[] vars = { 2, 6, 3, 1, 4, 6, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 1, 6, 4, 3 }; /** * An array for finding DNA triplets from an amino acid. * This one is also initialized by the constructor but * could be hard coded (if you're really serious). */ protected static int[][] decode = new int[21][]; /** * Private constructor for the singleton. Just initializes the * triplets array, and sets up the array of arrays for back * conversion from amino acid to DNA. */ private TwoWayLookUp() { triplets = new String[64]; int dex = 0; for( int i=0; i<4; i++ ) for( int j=0; j<4; j++ ) for( int k=0; k<4; k++ ) { char[] chars = new char[3]; chars[0] = bases[i]; chars[1] = bases[j]; chars[2] = bases[k]; triplets[dex++] = new String( chars ); } for( int i=0; i<21; i++ ) decode[i] = getPossibles( i ); } /** * Initialize this class as a singleton */ public static TwoWayLookUp getInstance() { if( instance == null ) instance = new TwoWayLookUp(); return instance; } /** * Given 1, 2, or 3 ordered DNA bases, list all amino acids * that are encoded. */ public String[] getAcidsFromDNA( String input ) { String[] result = null; int len = input.length(); int a = 0; switch( len ) { case 0 : return null; case 1 : result = new String[16]; for( int i=0; i<4; i++ ) for( int j=0; j<4; j++ ) { a = getAminoAcidFromTriplet( input + bases[i] + bases[j] ); result[4*i + j] = aminoAcids[ code[a] ]; } return result; case 2 : result = new String[4]; for( int i=0; i<4; i++ ) { a = getAminoAcidFromTriplet( input + bases[i] ); result[i] = aminoAcids[ code[a] ]; } return result; case 3 : result = new String[1]; a = getAminoAcidFromTriplet( input ); result[0] = aminoAcids[ code[a] ]; return result; default : return null; } } /** * Used in converting from Amino Acid to DNA. */ protected int[] getPossibles( int i ) { int dex = 0; int[] result = new int[ vars[i] ]; for( int k=0; k<64; k++ ) if( code[k] == i ) result[dex++] = k; return result; } /** * Returns a list of DNA triplets given an amino * acid. Use the abbreviations, e.g., Phe, Ala, STOP. */ public String[] getDNAFromAminoAcid( String s ) { String[] result = null; for( int dex=0; dex<21; dex++ ) if( s.equals( aminoAcids[dex] ) ) { int[] dna = decode[ dex ]; result = new String[ dna.length ]; for( int i=0; i