/**
* A look-up utility for converting to and from DNA sequences and
* amino acids. Hard-coded so there is no overhead (or benefit)
* due to the Java Collections interface/libraries.
*
* I wrote this for fun and to demonstrate for computer science
* folks how simple are the underpinnings of the genetic code.
* Compiles to c. 2,800 bytes. Uses an int array to link the
* two String arrays. Also for fun, I included the ability to
* list all the amino acids that start with just a single DNA
* base pair.
*
* No threading issues as no variables are changed (outside the
* constructor), but you might want to rethink the Singleton
* design, especially if you want to improve performance on multiple
* processor platforms. Thinking of potential improvements to run-time
* performance, one could use knowledge of the frequency of each
* amino acid to re-order the list of amino acids and thereby shorten
* the look-up times. Same for the DNA codons. (I did not take time
* to look at either of these.) And the issue of whether the code
* produced by the Java JIT compiler is as good as what one could get
* with C or assembler code for the same thing seems interesting.
*
* Other questions in this area are suggested by code like this. Like,
* how many different DNA sequences there are which would code for
* a known protein of say, 200 amino acids. And, if someone patents
* a DNA sequence, well, let's just use code like this to produce
* millions of unpatented sequences that produce the same protein. ;)
*
* A good primer on DNA and protein synthesis is
* here. An interesting table of Amino Acids is
* here.
*
* Freeware: not warranted in any way as to fitness for a particular
* purpose, implied or not. Use at your own risk.
* @version 1.0 2005/03/23
* @author Tony Dahlman
*/
public class TwoWayLookUp {
/** Use getInstance() to load (lazy singleton pattern) */
private static TwoWayLookUp instance = null;
/** All the amino acids as strings */
protected static final String[] aminoAcids = {
"Phe", "Leu", "Ile", "Met", "Val", "Ser", "Pro",
"Thr", "Ala", "Tyr", "His", "Gln", "Asn", "Lys",
"Asp", "Glu", "Cys", "Trp", "Arg", "Gly", "STOP" };
/**
* All the DNA bases as single characters
*/
protected static final char[] bases = { 'T', 'C', 'A', 'G' };
/**
* DNA triplets or codons. Initialized in the constructor, but
* you could hard code this if you're really serious.
*/
protected static String[] triplets = null;
/**
* The DNA translation code, associating triplets with amino acids
*/
protected static final int[] code = {
0, 0, 1, 1, 5, 5, 5, 5,
9, 9, 20, 20, 16, 16, 20, 17,
1, 1, 1, 1, 6, 6, 6, 6,
10, 10, 11, 11, 18, 18, 18, 18,
2, 2, 2, 3, 7, 7, 7, 7,
12, 12, 13, 13, 5, 5, 18, 18,
4, 4, 4, 4, 8, 8, 8, 8, 14, 14,
15, 15, 19, 19, 19, 19 };
/**
* The number of triplets for each amino acid (or "stop")
*/
protected static final int[] vars = {
2, 6, 3, 1, 4, 6, 4,
4, 4, 2, 2, 2, 2, 2,
2, 2, 2, 1, 6, 4, 3 };
/**
* An array for finding DNA triplets from an amino acid.
* This one is also initialized by the constructor but
* could be hard coded (if you're really serious).
*/
protected static int[][] decode = new int[21][];
/**
* Private constructor for the singleton. Just initializes the
* triplets array, and sets up the array of arrays for back
* conversion from amino acid to DNA.
*/
private TwoWayLookUp() {
triplets = new String[64];
int dex = 0;
for( int i=0; i<4; i++ )
for( int j=0; j<4; j++ )
for( int k=0; k<4; k++ ) {
char[] chars = new char[3];
chars[0] = bases[i];
chars[1] = bases[j];
chars[2] = bases[k];
triplets[dex++] = new String( chars );
}
for( int i=0; i<21; i++ )
decode[i] = getPossibles( i );
}
/**
* Initialize this class as a singleton
*/
public static TwoWayLookUp getInstance() {
if( instance == null )
instance = new TwoWayLookUp();
return instance;
}
/**
* Given 1, 2, or 3 ordered DNA bases, list all amino acids
* that are encoded.
*/
public String[] getAcidsFromDNA( String input ) {
String[] result = null;
int len = input.length();
int a = 0;
switch( len ) {
case 0 : return null;
case 1 :
result = new String[16];
for( int i=0; i<4; i++ )
for( int j=0; j<4; j++ ) {
a = getAminoAcidFromTriplet(
input + bases[i] + bases[j] );
result[4*i + j] = aminoAcids[ code[a] ];
}
return result;
case 2 :
result = new String[4];
for( int i=0; i<4; i++ ) {
a = getAminoAcidFromTriplet( input + bases[i] );
result[i] = aminoAcids[ code[a] ];
}
return result;
case 3 :
result = new String[1];
a = getAminoAcidFromTriplet( input );
result[0] = aminoAcids[ code[a] ];
return result;
default :
return null;
}
}
/**
* Used in converting from Amino Acid to DNA.
*/
protected int[] getPossibles( int i ) {
int dex = 0;
int[] result = new int[ vars[i] ];
for( int k=0; k<64; k++ )
if( code[k] == i )
result[dex++] = k;
return result;
}
/**
* Returns a list of DNA triplets given an amino
* acid. Use the abbreviations, e.g., Phe, Ala, STOP.
*/
public String[] getDNAFromAminoAcid( String s ) {
String[] result = null;
for( int dex=0; dex<21; dex++ )
if( s.equals( aminoAcids[dex] ) ) {
int[] dna = decode[ dex ];
result = new String[ dna.length ];
for( int i=0; i