25Jun/120
Smith-Waterman Algorithm Program And Source Code
Calculates the optimal alignment, distance matrices and the traceback for two given strings. Costs can be adjusted in the source, right now using a Blosum62 matrix.
Download: SmithWaterman (v. 1.0)Source
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Drawing;
namespace SmithWaterman
{
class SmithWaterman
{
private static int[,] matrix = {
{ 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0},
{-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3},
{-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3},
{-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3},
{ 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1},
{-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2},
{-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2},
{ 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3},
{-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3},
{-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3},
{-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1},
{-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2},
{-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1},
{-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1},
{-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2},
{ 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2},
{ 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0},
{-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3},
{-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1},
{ 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4}};
// quick and dirty equivalent of typesafe enum pattern, can also use HashMap
// or even better, EnumMap in Java 5.
// This code is for Java 1.4.2, so we will stick to the simple implementation
private static int getIndex(char a) {
// check for upper and lowercase characters
switch (char.ToUpper(a)) {
case 'A': return 0;
case 'R': return 1;
case 'N': return 2;
case 'D': return 3;
case 'C': return 4;
case 'Q': return 5;
case 'E': return 6;
case 'G': return 7;
case 'H': return 8;
case 'I': return 9;
case 'L': return 10;
case 'K': return 11;
case 'M': return 12;
case 'F': return 13;
case 'P': return 14;
case 'S': return 15;
case 'T': return 16;
case 'W': return 17;
case 'Y': return 18;
case 'V': return 19;
default: System.Windows.Forms.MessageBox.Show("Test"); return 0;
}
}
private const char NON_ALPHABETIC_CHARACTER1 = '§';
private const char NON_ALPHABETIC_CHARACTER2 = '$';
private enum Herkunft
{
KeineInformation,
Oben,
ObenLinks,
Links
}
private struct Alignment
{
public string Seq1 { get; set; }
public string Seq2 { get; set; }
}
private string SeqU = string.Empty, SeqV = string.Empty;
private int[,] Matrix;
private Herkunft[,] Herkunftsmatrix;
private List OptimaleAlignments;
public SmithWaterman(string sequenceU, string sequenceV)
{
SeqU = NON_ALPHABETIC_CHARACTER1 + sequenceU;
SeqV = NON_ALPHABETIC_CHARACTER2 + sequenceV;
this.Initialisieren();
this.MatrixBerechnen();
this.Backtrace();
}
private void Initialisieren()
{
Matrix = new int[SeqU.Length, SeqV.Length]; //Setzt praktischerweise gleichzeitig alles auf 0
Herkunftsmatrix = new Herkunft[SeqU.Length, SeqV.Length];
OptimaleAlignments = new List();
}
private void MatrixBerechnen()
{
int a, b, c;
for (int SeqUCounter = 1; SeqUCounter < SeqU.Length; SeqUCounter++)
{
for (int SeqVCounter = 1; SeqVCounter < SeqV.Length; SeqVCounter++)
{
a = 0; b = 0; c = 0;
a = Matrix[SeqUCounter - 1, SeqVCounter - 1] + Score(SeqU[SeqUCounter], SeqV[SeqVCounter]);
b = Matrix[SeqUCounter - 1, SeqVCounter] + Score(SeqU[SeqUCounter], '-');
c = Matrix[SeqUCounter, SeqVCounter - 1] + Score('-', SeqV[SeqVCounter]);
int max = this.Max(a, b, c);
if (max < 0)
{
max = 0;
}
if (max != 0)
{
if (max == a)
{ Herkunftsmatrix[SeqUCounter, SeqVCounter] = Herkunft.ObenLinks; }
if (max == b)
{ Herkunftsmatrix[SeqUCounter, SeqVCounter] = Herkunft.Links; }
if (max == c)
{ Herkunftsmatrix[SeqUCounter, SeqVCounter] = Herkunft.Oben; }
}
Matrix[SeqUCounter, SeqVCounter] = max;
}
}
}
private int Score(char uj, char vj)
{
if (uj != '-' && vj != '-')
{
//if (uj == vj)
//{
// return 1;
//}
//if (uj != vj)
//{
return matrix[getIndex(uj), getIndex(vj)];
//}
}
else
{
return -5;
}
throw new Exception("Unreachable code reached...what?");
}
private void Backtrace()
{
List HöchsteZahl = new List();
int tempHöchsteZahl = 0;
//Höchste Zahl ermitteln
for (int SeqUCounter = 1; SeqUCounter < SeqU.Length; SeqUCounter++)
{
for (int SeqVCounter = 1; SeqVCounter < SeqV.Length; SeqVCounter++) { if (Matrix[SeqUCounter, SeqVCounter] > tempHöchsteZahl)
{ tempHöchsteZahl = Matrix[SeqUCounter, SeqVCounter]; }
}
}
//Alle raussuchen
for (int SeqUCounter = 1; SeqUCounter < SeqU.Length; SeqUCounter++)
{
for (int SeqVCounter = 1; SeqVCounter < SeqV.Length; SeqVCounter++)
{
if (Matrix[SeqUCounter, SeqVCounter] == tempHöchsteZahl)
{ HöchsteZahl.Add(new Point(SeqUCounter, SeqVCounter)); }
}
}
for (int i = 0; i < HöchsteZahl.Count; i++)
{
Alignment tempAlignment = new Alignment();
int u = HöchsteZahl[i].X, v = HöchsteZahl[i].Y;
while (Matrix[u, v] != 0)
{
switch (Herkunftsmatrix[u,v])
{
case Herkunft.KeineInformation:
System.Windows.Forms.MessageBox.Show("Hummm");
break;
case Herkunft.Oben:
tempAlignment.Seq1 = '-' + tempAlignment.Seq1;
tempAlignment.Seq2 = SeqU[v] + tempAlignment.Seq2;
v--;
break;
case Herkunft.ObenLinks:
tempAlignment.Seq1 = SeqU[u] + tempAlignment.Seq1;
tempAlignment.Seq2 = SeqV[v] + tempAlignment.Seq2;
u--;v--;
break;
case Herkunft.Links:
tempAlignment.Seq1 = SeqU[u] + tempAlignment.Seq1;
tempAlignment.Seq2 = '-' + tempAlignment.Seq2;
u--;
break;
default:
break;
}
}
OptimaleAlignments.Add(tempAlignment);
}
}
private int Max(int a, int b, int c)
{
return Math.Max(a, Math.Max(b, c));
}
public string Print()
{
string tempString = string.Empty;
SeqU = SeqU.Replace(NON_ALPHABETIC_CHARACTER1, '§');
SeqV = SeqV.Replace(NON_ALPHABETIC_CHARACTER2, '§');
//Matrix
tempString += "Smith-Waterman Matrix\r\n";
tempString += "§\t";
for (int SeqUCounter = 0; SeqUCounter < SeqU.Length; SeqUCounter++)
{
tempString += SeqU[SeqUCounter] + "\t";
}
tempString += "\r\n";
for (int SeqVCounter = 0; SeqVCounter < SeqV.Length; SeqVCounter++)
{
tempString += SeqV[SeqVCounter] + "\t";
for (int SeqUCounter = 0; SeqUCounter < SeqU.Length; SeqUCounter++)
{
tempString += Matrix[SeqUCounter, SeqVCounter] + "\t";
}
tempString += "\r\n";
}
tempString += "\r\n";
//Optimale Alignments
tempString += "Optimale Alignments\r\n";
foreach (Alignment a in OptimaleAlignments)
{
foreach (char c in a.Seq1)
{ tempString += c + "\t"; }
tempString += "\r\n";
foreach (char c in a.Seq2)
{ tempString += c + "\t"; }
tempString += "\r\n";
tempString += "\r\n";
}
//Herkunftsmatrix
tempString += "Herkunftsmatrix\r\n";
for (int SeqUCounter = 0; SeqUCounter < SeqU.Length; SeqUCounter++)
{
for (int SeqVCounter = 0; SeqVCounter < SeqV.Length; SeqVCounter++)
{
if (Herkunftsmatrix[SeqUCounter, SeqVCounter] != Herkunft.KeineInformation)
{
tempString += string.Format("[{0}][{1}] = [{2}]\r\n", SeqUCounter, SeqVCounter, Herkunftsmatrix[SeqUCounter, SeqVCounter]);
}
}
tempString += "\r\n";
}
return tempString;
}
}
}25Jun/120
Gotoh Algorithm Calculator Program And Source Code
Calculates the optimal alignment, distance matrices and the traceback for two given strings. Costs for starting and extending a gap can be modified.
Download: Gotoh Algorithm (v. 1.0)Source
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Gotoh_Algorithmus
{
public class Gotoh
{
private const char NON_ALPHABETIC_CHARACTER1 = '§'; //Pesudo Sigma für die erste Sequenz
private const char NON_ALPHABETIC_CHARACTER2 = '$'; //Pseudo Sigma für die zweite Sequenz
private const int PSEUDO_UNENDLICH = int.MaxValue - 100;
private int GAP_START = 1;
private int GAP_EXTEND = 1;
private const int MATCH = 0;
private const int MISMATCH = 1;
private string SeqU = string.Empty, SeqV = string.Empty; //Beiden Sequenzen
private int[,] S, H, V; //Die drei Matrizen
private Origin[,] HOrigin;
private Origin[,] VOrigin;
private Origin[,] SOrigin;
private enum Matrizen
{
MatrixS,
MatrixH,
MatrixV
}
private enum Origin
{
None,
MatrixS,
MatrixH,
MatrixV,
Up,
Left,
UpLeft
}
private struct Alignment
{
public string Seq1 { get; set; }
public string Seq2 { get; set; }
}
private Alignment OptimalesAlignment;
public Gotoh(string seqA, string seqB, int gapStart, int gapExtend)
{
//Sequenzen werden abgespeichert mit pseudo Sigma als Präfix
SeqU = NON_ALPHABETIC_CHARACTER1 + seqA;
SeqV = NON_ALPHABETIC_CHARACTER2 + seqB;
GAP_START = gapStart;
GAP_EXTEND = gapExtend;
this.Initialisieren();
this.MatrixBerechnen();
this.Backtrace();
}
private void Initialisieren()
{
OptimalesAlignment = new Alignment();
S = new int[SeqU.Length, SeqV.Length]; //Setzt praktischerweise gleichzeitig alles auf 0
H = new int[SeqU.Length, SeqV.Length]; //Setzt praktischerweise gleichzeitig alles auf 0
V = new int[SeqU.Length, SeqV.Length]; //Setzt praktischerweise gleichzeitig alles auf 0
HOrigin = new Origin[SeqU.Length, SeqV.Length];
VOrigin = new Origin[SeqU.Length, SeqV.Length];
SOrigin = new Origin[SeqU.Length, SeqV.Length];
for (int j = 0; j < SeqV.Length; j++)
{ V[0, j] = PSEUDO_UNENDLICH; } //Setzt V(0,j) auf pseudo -unendlich
for (int i = 1; i < SeqU.Length; i++)
{ S[i, 0] = V[i, 0] = GapFunktion(i); SOrigin[i, 0] = Origin.MatrixV; VOrigin[i, 0] = Origin.MatrixS; } //Setzt die GAP Kosten
for (int j = 1; j < SeqV.Length; j++)
{ S[0, j] = H[0, j] = GapFunktion(j); SOrigin[0, j] = Origin.MatrixH; HOrigin[0,j] = Origin.MatrixS; }//Setzt die GAP Kosten
for (int i = 0; i < SeqU.Length; i++)
{ H[i, 0] = PSEUDO_UNENDLICH; } //Setzt H(i,0) auf pseudo -unendlich
}
private void MatrixBerechnen()
{
int a, b, c; //Temporär die Ergebnisse speichern
for (int i = 1; i < SeqU.Length; i++)
{
for (int j = 1; j < SeqV.Length; j++)
{
a = 0; b = 0; c = 0;
//H(i,j)
a = S[i, j - 1] + GAP_START + GAP_EXTEND;
b = H[i, j - 1] + GAP_EXTEND;
H[i, j] = Min(a, b);
//Backtrace Matrix für H
if (H[i, j] == a)
{ HOrigin[i, j] = Origin.MatrixS; } //Matrix Wechseln
else
{ HOrigin[i, j] = Origin.Up; } //Hoch
a = 0; b = 0;
//V(i,j)
a = S[i - 1, j] + GAP_START + GAP_EXTEND;
b = V[i - 1, j] + GAP_EXTEND;
V[i, j] = Min(a, b);
//Backtrace Matrix für V
if (V[i, j] == a)
{ VOrigin[i, j] = Origin.MatrixS; } //Matrix Wechseln
else
{ VOrigin[i, j] = Origin.Left; } //Links
a = 0; b = 0;
//S(i,j)
a = S[i - 1, j - 1] + Score(SeqU[i], SeqV[j]);
b = H[i, j];
c = V[i, j];
S[i, j] = Min(a, b, c);
//Backtrace Matrix für S
if (S[i, j] == a)
{ SOrigin[i, j] = Origin.UpLeft; } //Matrix Wechseln
if (S[i, j] == b)
{ SOrigin[i, j] = Origin.MatrixH; } //Matrix Wechseln
if (S[i, j] == c)
{ SOrigin[i, j] = Origin.MatrixV; } //Matrix Wechseln
}
}
}
private int Score(char uj, char vj)
{
return uj == vj ? MATCH : MISMATCH;
}
private void Backtrace()
{
int u = SeqU.Length - 1, v = SeqV.Length - 1;
Matrizen AktuelleMatrix = Matrizen.MatrixS;
while (u != 0 || v != 0)
{
switch (AktuelleMatrix)
{
case Matrizen.MatrixS:
switch (SOrigin[u,v])
{
case Origin.UpLeft:
OptimalesAlignment.Seq1 = SeqU[u] + OptimalesAlignment.Seq1;
OptimalesAlignment.Seq2 = SeqV[v] + OptimalesAlignment.Seq2;
u--;v--;
break;
case Origin.MatrixH:
AktuelleMatrix = Matrizen.MatrixH;
break;
case Origin.MatrixV:
AktuelleMatrix = Matrizen.MatrixV;
break;
case Origin.Up:
case Origin.Left:
case Origin.MatrixS:
default:
break;
}
break;
case Matrizen.MatrixH:
switch (HOrigin[u,v])
{
case Origin.MatrixS:
OptimalesAlignment.Seq1 = '-' + OptimalesAlignment.Seq1;
OptimalesAlignment.Seq2 = SeqU[v] + OptimalesAlignment.Seq2;
v--;
AktuelleMatrix = Matrizen.MatrixS;
break;
case Origin.Up:
OptimalesAlignment.Seq1 = '-' + OptimalesAlignment.Seq1;
OptimalesAlignment.Seq2 = SeqU[v] + OptimalesAlignment.Seq2;
v--;
break;
case Origin.MatrixH:
case Origin.MatrixV:
case Origin.Left:
case Origin.UpLeft:
default:
break;
}
break;
case Matrizen.MatrixV:
switch (VOrigin[u,v])
{
case Origin.MatrixS:
OptimalesAlignment.Seq1 = SeqU[u] + OptimalesAlignment.Seq1;
OptimalesAlignment.Seq2 = '-' + OptimalesAlignment.Seq2;
u--;
AktuelleMatrix = Matrizen.MatrixS;
break;
case Origin.Left:
OptimalesAlignment.Seq1 = SeqU[u] + OptimalesAlignment.Seq1;
OptimalesAlignment.Seq2 = '-' + OptimalesAlignment.Seq2;
u--;
break;
case Origin.MatrixH:
case Origin.MatrixV:
case Origin.Up:
case Origin.UpLeft:
default:
break;
}
break;
default:
throw new DivideByZeroException("Trololol");
}
}
}
private static int Min(int a, int b)
{
return Math.Min(a, b);
}
private static int Min(int a, int b, int c)
{
return Math.Min(a, Math.Min(b, c));
}
private int GapFunktion(int position)
{ return GAP_START + position * GAP_EXTEND; }
public string Print()
{
string tempString = string.Empty;
SeqU = SeqU.Replace(NON_ALPHABETIC_CHARACTER1, '§');
SeqV = SeqV.Replace(NON_ALPHABETIC_CHARACTER2, '§');
//Optimale Alignments
tempString += "Optimale Alignments\r\n";
foreach (char c in OptimalesAlignment.Seq1)
{ tempString += c + "\t"; }
tempString += "\r\n";
foreach (char c in OptimalesAlignment.Seq2)
{ tempString += c + "\t"; }
tempString += "\r\n";
tempString += "\r\n";
//Matrizen
tempString += "S(i,j)\r\n";
tempString += "§\t";
for (int SeqUCounter = 0; SeqUCounter < SeqU.Length; SeqUCounter++)
{
tempString += SeqU[SeqUCounter] + "\t";
}
tempString += "\r\n";
for (int SeqVCounter = 0; SeqVCounter < SeqV.Length; SeqVCounter++)
{
tempString += SeqV[SeqVCounter] + "\t";
for (int SeqUCounter = 0; SeqUCounter < SeqU.Length; SeqUCounter++)
{
if (S[SeqUCounter, SeqVCounter] == PSEUDO_UNENDLICH)
{ tempString += "∞\t"; }
else
{
tempString += S[SeqUCounter, SeqVCounter] + "\t";
}
}
tempString += "\r\n";
}
tempString += "\r\n";
tempString += "H(i,j)\r\n";
tempString += "§\t";
for (int SeqUCounter = 0; SeqUCounter < SeqU.Length; SeqUCounter++)
{
tempString += SeqU[SeqUCounter] + "\t";
}
tempString += "\r\n";
for (int SeqVCounter = 0; SeqVCounter < SeqV.Length; SeqVCounter++)
{
tempString += SeqV[SeqVCounter] + "\t";
for (int SeqUCounter = 0; SeqUCounter < SeqU.Length; SeqUCounter++)
{
if (H[SeqUCounter, SeqVCounter] == PSEUDO_UNENDLICH)
{ tempString += "∞\t"; }
else
{
tempString += H[SeqUCounter, SeqVCounter] + "\t";
}
}
tempString += "\r\n";
}
tempString += "\r\n";
tempString += "V(i,j)\r\n";
tempString += "§\t";
for (int SeqUCounter = 0; SeqUCounter < SeqU.Length; SeqUCounter++)
{
tempString += SeqU[SeqUCounter] + "\t";
}
tempString += "\r\n";
for (int SeqVCounter = 0; SeqVCounter < SeqV.Length; SeqVCounter++)
{
tempString += SeqV[SeqVCounter] + "\t";
for (int SeqUCounter = 0; SeqUCounter < SeqU.Length; SeqUCounter++)
{
if (V[SeqUCounter, SeqVCounter] == PSEUDO_UNENDLICH)
{ tempString += "∞\t"; }
else
{
tempString += V[SeqUCounter, SeqVCounter] + "\t";
}
}
tempString += "\r\n";
}
tempString += "\r\n";
//Herkunftsmatrix
tempString += "Herkunftsmatrix\r\n";
tempString += "S(i,j)\r\n";
for (int SeqUCounter = 0; SeqUCounter < SeqU.Length; SeqUCounter++)
{
for (int SeqVCounter = 0; SeqVCounter < SeqV.Length; SeqVCounter++)
{
if (SOrigin[SeqUCounter, SeqVCounter] != Origin.None)
{
tempString += string.Format("[{0}][{1}] = [{2}]\r\n", SeqUCounter, SeqVCounter, SOrigin[SeqUCounter, SeqVCounter]);
}
}
tempString += "\r\n";
}
tempString += "H(i,j)\r\n";
for (int SeqUCounter = 0; SeqUCounter < SeqU.Length; SeqUCounter++)
{
for (int SeqVCounter = 0; SeqVCounter < SeqV.Length; SeqVCounter++)
{
if (HOrigin[SeqUCounter, SeqVCounter] != Origin.None)
{
tempString += string.Format("[{0}][{1}] = [{2}]\r\n", SeqUCounter, SeqVCounter, HOrigin[SeqUCounter, SeqVCounter]);
}
}
tempString += "\r\n";
}
tempString += "V(i,j)\r\n";
for (int SeqUCounter = 0; SeqUCounter < SeqU.Length; SeqUCounter++)
{
for (int SeqVCounter = 0; SeqVCounter < SeqV.Length; SeqVCounter++)
{
if (VOrigin[SeqUCounter, SeqVCounter] != Origin.None)
{
tempString += string.Format("[{0}][{1}] = [{2}]\r\n", SeqUCounter, SeqVCounter, VOrigin[SeqUCounter, SeqVCounter]);
}
}
tempString += "\r\n";
}
return tempString;
}
}
}5Dec/110
Knuth-Morris-Pratt Algorithm Program
Dieses Programm simuliert den Knuth-Morris-Pratt Algorithmus.
This program simulates the Knuth-Morris-Pratt algorithm.
Download: KMP & BM Algorithmus (v. 1.0)