umma.dev

Rosalind Problem Sets

TypeScript solutions to all 105 problems from the Rosalind bioinformatics problem set.

Shared Utilities

const CODON_TABLE: Record<string, string> = {
  UUU:'F',UUC:'F',UUA:'L',UUG:'L',CUU:'L',CUC:'L',CUA:'L',CUG:'L',
  AUU:'I',AUC:'I',AUA:'I',AUG:'M',GUU:'V',GUC:'V',GUA:'V',GUG:'V',
  UCU:'S',UCC:'S',UCA:'S',UCG:'S',CCU:'P',CCC:'P',CCA:'P',CCG:'P',
  ACU:'T',ACC:'T',ACA:'T',ACG:'T',GCU:'A',GCC:'A',GCA:'A',GCG:'A',
  UAU:'Y',UAC:'Y',UAA:'*',UAG:'*',CAU:'H',CAC:'H',CAA:'Q',CAG:'Q',
  AAU:'N',AAC:'N',AAA:'K',AAG:'K',GAU:'D',GAC:'D',GAA:'E',GAG:'E',
  UGU:'C',UGC:'C',UGA:'*',UGG:'W',CGU:'R',CGC:'R',CGA:'R',CGG:'R',
  AGU:'S',AGC:'S',AGA:'R',AGG:'R',GGU:'G',GGC:'G',GGA:'G',GGG:'G',
};

const MASS: Record<string, number> = {
  A:71.03711,C:103.00919,D:115.02694,E:129.04259,F:147.06841,
  G:57.02146, H:137.05891,I:113.08406,K:128.09496,L:113.08406,
  M:131.04049,N:114.04293,P:97.05276, Q:128.05858,R:156.10111,
  S:87.03203, T:101.04768,V:99.06841, W:186.07931,Y:163.06333,
};

// Full BLOSUM62 matrix (used by GLOB, GAFF, GCON, OAP, SMGB)
const BLOSUM62: Record<string, Record<string, number>> = {
  A:{A:4,R:-1,N:-2,D:-2,C:0,Q:-1,E:-1,G:0,H:-2,I:-1,L:-1,K:-1,M:-1,F:-2,P:-1,S:1,T:0,W:-3,Y:-2,V:0},
  R:{A:-1,R:5,N:0,D:-2,C:-3,Q:1,E:0,G:-2,H:0,I:-3,L:-2,K:2,M:-1,F:-3,P:-2,S:-1,T:-1,W:-3,Y:-2,V:-3},
  N:{A:-2,R:0,N:6,D:1,C:-3,Q:0,E:0,G:0,H:1,I:-3,L:-3,K:0,M:-2,F:-3,P:-2,S:1,T:0,W:-4,Y:-2,V:-3},
  D:{A:-2,R:-2,N:1,D:6,C:-3,Q:0,E:2,G:-1,H:-1,I:-3,L:-4,K:-1,M:-3,F:-3,P:-1,S:0,T:-1,W:-4,Y:-3,V:-3},
  C:{A:0,R:-3,N:-3,D:-3,C:9,Q:-3,E:-4,G:-3,H:-3,I:-1,L:-1,K:-3,M:-1,F:-2,P:-3,S:-1,T:-1,W:-2,Y:-2,V:-1},
  Q:{A:-1,R:1,N:0,D:0,C:-3,Q:5,E:2,G:-2,H:0,I:-3,L:-2,K:1,M:0,F:-3,P:-1,S:0,T:-1,W:-2,Y:-1,V:-2},
  E:{A:-1,R:0,N:0,D:2,C:-4,Q:2,E:5,G:-2,H:0,I:-3,L:-3,K:1,M:-2,F:-3,P:-1,S:0,T:-1,W:-3,Y:-2,V:-2},
  G:{A:0,R:-2,N:0,D:-1,C:-3,Q:-2,E:-2,G:6,H:-2,I:-4,L:-4,K:-2,M:-3,F:-3,P:-2,S:0,T:-2,W:-2,Y:-3,V:-3},
  H:{A:-2,R:0,N:1,D:-1,C:-3,Q:0,E:0,G:-2,H:8,I:-3,L:-3,K:-1,M:-2,F:-1,P:-2,S:-1,T:-2,W:-2,Y:2,V:-3},
  I:{A:-1,R:-3,N:-3,D:-3,C:-1,Q:-3,E:-3,G:-4,H:-3,I:4,L:2,K:-3,M:1,F:0,P:-3,S:-2,T:-1,W:-3,Y:-1,V:3},
  L:{A:-1,R:-2,N:-3,D:-4,C:-1,Q:-2,E:-3,G:-4,H:-3,I:2,L:4,K:-2,M:2,F:0,P:-3,S:-2,T:-1,W:-2,Y:-1,V:1},
  K:{A:-1,R:2,N:0,D:-1,C:-3,Q:1,E:1,G:-2,H:-1,I:-3,L:-2,K:5,M:-1,F:-3,P:-1,S:0,T:-1,W:-3,Y:-2,V:-2},
  M:{A:-1,R:-1,N:-2,D:-3,C:-1,Q:0,E:-2,G:-3,H:-2,I:1,L:2,K:-1,M:5,F:0,P:-2,S:-1,T:-1,W:-1,Y:-1,V:1},
  F:{A:-2,R:-3,N:-3,D:-3,C:-2,Q:-3,E:-3,G:-3,H:-1,I:0,L:0,K:-3,M:0,F:6,P:-4,S:-2,T:-2,W:1,Y:3,V:-1},
  P:{A:-1,R:-2,N:-2,D:-1,C:-3,Q:-1,E:-1,G:-2,H:-2,I:-3,L:-3,K:-1,M:-2,F:-4,P:7,S:-1,T:-1,W:-4,Y:-3,V:-2},
  S:{A:1,R:-1,N:1,D:0,C:-1,Q:0,E:0,G:0,H:-1,I:-2,L:-2,K:0,M:-1,F:-2,P:-1,S:4,T:1,W:-3,Y:-2,V:-2},
  T:{A:0,R:-1,N:0,D:-1,C:-1,Q:-1,E:-1,G:-2,H:-2,I:-1,L:-1,K:-1,M:-1,F:-2,P:-1,S:1,T:5,W:-2,Y:-2,V:0},
  W:{A:-3,R:-3,N:-4,D:-4,C:-2,Q:-2,E:-3,G:-2,H:-2,I:-3,L:-2,K:-3,M:-1,F:1,P:-4,S:-3,T:-2,W:11,Y:2,V:-3},
  Y:{A:-2,R:-2,N:-2,D:-3,C:-2,Q:-1,E:-2,G:-3,H:2,I:-1,L:-1,K:-2,M:-1,F:3,P:-3,S:-2,T:-2,W:2,Y:7,V:-1},
  V:{A:0,R:-3,N:-3,D:-3,C:-1,Q:-2,E:-2,G:-3,H:-3,I:3,L:1,K:-2,M:1,F:-1,P:-2,S:-2,T:0,W:-3,Y:-1,V:4},
};

// PAM250 matrix (used by LOCA, LAFF)
const PAM250: Record<string, Record<string, number>> = {
  A:{A:2,R:-2,N:0,D:0,C:-2,Q:0,E:0,G:1,H:-1,I:-1,L:-2,K:-1,M:-1,F:-3,P:1,S:1,T:1,W:-6,Y:-3,V:0},
  R:{A:-2,R:6,N:0,D:-1,C:-4,Q:1,E:-1,G:-3,H:2,I:-2,L:-3,K:3,M:0,F:-4,P:0,S:0,T:-1,W:2,Y:-4,V:-2},
  N:{A:0,R:0,N:2,D:2,C:-4,Q:1,E:1,G:0,H:2,I:-2,L:-3,K:1,M:-2,F:-3,P:0,S:1,T:0,W:-4,Y:-2,V:-2},
  D:{A:0,R:-1,N:2,D:4,C:-5,Q:2,E:3,G:1,H:1,I:-2,L:-4,K:0,M:-3,F:-6,P:-1,S:0,T:0,W:-7,Y:-4,V:-2},
  C:{A:-2,R:-4,N:-4,D:-5,C:12,Q:-5,E:-5,G:-3,H:-3,I:-2,L:-6,K:-5,M:-5,F:-4,P:-3,S:0,T:-2,W:-8,Y:0,V:-2},
  Q:{A:0,R:1,N:1,D:2,C:-5,Q:4,E:2,G:-1,H:3,I:-2,L:-2,K:1,M:-1,F:-5,P:0,S:-1,T:-1,W:-5,Y:-4,V:-2},
  E:{A:0,R:-1,N:1,D:3,C:-5,Q:2,E:4,G:0,H:1,I:-2,L:-3,K:0,M:-2,F:-5,P:-1,S:0,T:0,W:-7,Y:-4,V:-2},
  G:{A:1,R:-3,N:0,D:1,C:-3,Q:-1,E:0,G:5,H:-2,I:-3,L:-4,K:-2,M:-3,F:-5,P:0,S:1,T:0,W:-7,Y:-5,V:-1},
  H:{A:-1,R:2,N:2,D:1,C:-3,Q:3,E:1,G:-2,H:6,I:-2,L:-2,K:0,M:-2,F:-2,P:0,S:-1,T:-1,W:-3,Y:0,V:-2},
  I:{A:-1,R:-2,N:-2,D:-2,C:-2,Q:-2,E:-2,G:-3,H:-2,I:5,L:2,K:-2,M:2,F:1,P:-2,S:-1,T:0,W:-5,Y:-1,V:4},
  L:{A:-2,R:-3,N:-3,D:-4,C:-6,Q:-2,E:-3,G:-4,H:-2,I:2,L:6,K:-3,M:4,F:2,P:-3,S:-3,T:-2,W:-2,Y:-1,V:2},
  K:{A:-1,R:3,N:1,D:0,C:-5,Q:1,E:0,G:-2,H:0,I:-2,L:-3,K:5,M:0,F:-5,P:-1,S:0,T:0,W:-3,Y:-4,V:-2},
  M:{A:-1,R:0,N:-2,D:-3,C:-5,Q:-1,E:-2,G:-3,H:-2,I:2,L:4,K:0,M:6,F:0,P:-2,S:-2,T:-1,W:-4,Y:-2,V:2},
  F:{A:-3,R:-4,N:-3,D:-6,C:-4,Q:-5,E:-5,G:-5,H:-2,I:1,L:2,K:-5,M:0,F:9,P:-5,S:-3,T:-3,W:0,Y:7,V:-1},
  P:{A:1,R:0,N:0,D:-1,C:-3,Q:0,E:-1,G:0,H:0,I:-2,L:-3,K:-1,M:-2,F:-5,P:6,S:1,T:0,W:-6,Y:-5,V:-1},
  S:{A:1,R:0,N:1,D:0,C:0,Q:-1,E:0,G:1,H:-1,I:-1,L:-3,K:0,M:-2,F:-3,P:1,S:2,T:1,W:-2,Y:-3,V:-1},
  T:{A:1,R:-1,N:0,D:0,C:-2,Q:-1,E:0,G:0,H:-1,I:0,L:-2,K:0,M:-1,F:-3,P:0,S:1,T:3,W:-5,Y:-3,V:0},
  W:{A:-6,R:2,N:-4,D:-7,C:-8,Q:-5,E:-7,G:-7,H:-3,I:-5,L:-2,K:-3,M:-4,F:0,P:-6,S:-2,T:-5,W:17,Y:0,V:-6},
  Y:{A:-3,R:-4,N:-2,D:-4,C:0,Q:-4,E:-4,G:-5,H:0,I:-1,L:-1,K:-4,M:-2,F:7,P:-5,S:-3,T:-3,W:0,Y:10,V:-2},
  V:{A:0,R:-2,N:-2,D:-2,C:-2,Q:-2,E:-2,G:-1,H:-2,I:4,L:2,K:-2,M:2,F:-1,P:-1,S:-1,T:0,W:-6,Y:-2,V:4},
};

function parseFasta(input: string): [string, string][] {
  const result: [string, string][] = [];
  let name = '', seq = '';
  for (const line of input.trim().split('\n')) {
    if (line.startsWith('>')) {
      if (name) result.push([name, seq]);
      name = line.slice(1).trim(); seq = '';
    } else {
      seq += line.trim();
    }
  }
  if (name) result.push([name, seq]);
  return result;
}

function revComp(dna: string): string {
  const comp: Record<string, string> = { A:'T', T:'A', C:'G', G:'C' };
  return dna.split('').reverse().map(c => comp[c] ?? c).join('');
}

function factorial(n: bigint): bigint {
  let r = 1n;
  for (let i = 2n; i <= n; i++) r *= i;
  return r;
}

function choose(n: bigint, k: bigint): bigint {
  if (k > n || k < 0n) return 0n;
  if (k === 0n || k === n) return 1n;
  let r = 1n;
  for (let i = 0n; i < k; i++) r = r * (n - i) / (i + 1n);
  return r;
}

function translateRNA(rna: string): string {
  let protein = '';
  for (let i = 0; i + 2 < rna.length; i += 3) {
    const aa = CODON_TABLE[rna.slice(i, i + 3)];
    if (!aa || aa === '*') break;
    protein += aa;
  }
  return protein;
}

DNA — Counting DNA Nucleotides

Count occurrences of each nucleotide in a DNA string.

function dna(s: string): string {
  let A = 0, C = 0, G = 0, T = 0;
  for (const c of s) {
    if (c === 'A') A++;
    else if (c === 'C') C++;
    else if (c === 'G') G++;
    else if (c === 'T') T++;
  }
  return `${A} ${C} ${G} ${T}`;
}
// dna("AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC")
// => "20 12 17 21"

RNA — Transcribing DNA into RNA

Replace every T with U.

function rna(s: string): string {
  return s.replaceAll('T', 'U');
}

REVC — Complementing a Strand of DNA

Return the reverse complement of a DNA string.

function revc(s: string): string {
  return revComp(s);
}

FIB — Rabbits and Recurrence Relations

Given n months and k offspring per pair, return the total number of rabbit pairs after n months.

function fib(n: number, k: number): bigint {
  if (n <= 2) return 1n;
  let a = 1n, b = 1n;
  for (let i = 2; i < n; i++) {
    [a, b] = [b, b + BigInt(k) * a];
  }
  return b;
}
// fib(5, 3) => 19n

GC — Computing GC Content

Find the FASTA record with the highest GC content and print its name and percentage.

function gc(input: string): string {
  const seqs = parseFasta(input);
  let bestName = '', bestGC = -1;
  for (const [name, seq] of seqs) {
    const count = [...seq].filter(c => c === 'G' || c === 'C').length;
    const pct = (count / seq.length) * 100;
    if (pct > bestGC) { bestGC = pct; bestName = name; }
  }
  return `${bestName}\n${bestGC.toFixed(6)}`;
}

HAMM — Counting Point Mutations

Count positions where two equal-length strings differ (Hamming distance).

function hamm(s: string, t: string): number {
  return [...s].filter((c, i) => c !== t[i]).length;
}

IPRB — Mendel’s First Law

Given k homozygous dominant, m heterozygous, and n homozygous recessive organisms, find the probability that two randomly chosen parents produce a dominant-phenotype offspring.

function iprb(k: number, m: number, n: number): number {
  const total = k + m + n;
  const t = total * (total - 1);
  // P(recessive offspring)
  const rr = n * (n - 1);          // nn × nn → all recessive
  const mr = 2 * m * n;            // Mm × nn → 1/2 recessive
  const mm = m * (m - 1);          // Mm × Mm → 1/4 recessive
  return 1 - (rr + mr * 0.5 + mm * 0.25) / t;
}
// iprb(2, 2, 2) => 0.7833...

PROT — Translating RNA into Protein

Translate an RNA string into a protein string using the standard codon table.

function prot(rna: string): string {
  return translateRNA(rna);
}

SUBS — Finding a Motif in DNA

Find all 1-based positions where pattern t occurs in string s.

function subs(s: string, t: string): string {
  const positions: number[] = [];
  for (let i = 0; i <= s.length - t.length; i++) {
    if (s.slice(i, i + t.length) === t) positions.push(i + 1);
  }
  return positions.join(' ');
}

CONS — Consensus and Profile

Build a profile matrix and consensus string from a set of aligned DNA sequences.

function cons(input: string): string {
  const seqs = parseFasta(input).map(([, s]) => s);
  const n = seqs[0].length;
  const profile: Record<string, number[]> = {
    A: new Array(n).fill(0), C: new Array(n).fill(0),
    G: new Array(n).fill(0), T: new Array(n).fill(0),
  };
  for (const seq of seqs)
    for (let i = 0; i < n; i++) profile[seq[i]][i]++;

  const consensus = Array.from({ length: n }, (_, i) =>
    (['A','C','G','T'] as const).reduce((best, b) =>
      profile[b][i] > profile[best][i] ? b : best, 'A' as string)
  ).join('');

  return [
    consensus,
    ...(['A','C','G','T'] as const).map(b => `${b}: ${profile[b].join(' ')}`),
  ].join('\n');
}

FIBD — Mortal Fibonacci Rabbits

Rabbits die after m months. Return the total number of pairs after n months.

function fibd(n: number, m: number): bigint {
  // ages[i] = pairs that are (i+1) months old
  let ages = new Array(m).fill(0n);
  ages[0] = 1n;
  for (let t = 1; t < n; t++) {
    const next = new Array(m).fill(0n);
    // Newborns = sum of all adults (age >= 2, i.e. index >= 1)
    next[0] = ages.slice(1).reduce((a, b) => a + b, 0n);
    // Survivors shift right; pairs at index m-1 die
    for (let i = 1; i < m; i++) next[i] = ages[i - 1];
    ages = next;
  }
  return ages.reduce((a, b) => a + b, 0n);
}
// fibd(6, 3) => 4n

GRPH — Overlap Graphs

Find all pairs (A, B) where the last 3 characters of A equal the first 3 of B, and A ≠ B.

function grph(input: string): string {
  const seqs = parseFasta(input);
  const results: string[] = [];
  for (const [n1, s1] of seqs) {
    const suffix = s1.slice(-3);
    for (const [n2, s2] of seqs) {
      if (n1 !== n2 && s2.startsWith(suffix))
        results.push(`${n1} ${n2}`);
    }
  }
  return results.join('\n');
}

IEV — Calculating Expected Offspring

Given counts of 6 genotype-pair types, compute the expected number of dominant-phenotype offspring (2 children per couple).

function iev(counts: number[]): number {
  // AA×AA, AA×Aa, AA×aa, Aa×Aa, Aa×aa, aa×aa
  const probs = [1.0, 1.0, 1.0, 0.75, 0.5, 0.0];
  return counts.reduce((sum, c, i) => sum + c * 2 * probs[i], 0);
}

LCSM — Finding a Shared Motif

Find the longest substring shared by all sequences in a FASTA file.

function lcsm(input: string): string {
  const seqs = parseFasta(input).map(([, s]) => s);
  const shortest = seqs.reduce((a, b) => a.length <= b.length ? a : b);
  for (let len = shortest.length; len >= 1; len--) {
    for (let i = 0; i <= shortest.length - len; i++) {
      const sub = shortest.slice(i, i + len);
      if (seqs.every(s => s.includes(sub))) return sub;
    }
  }
  return '';
}

LIA — Independent Alleles

Probability that among 2^k offspring in the k-th generation (always breeding AaBb × AaBb), at least N are AaBb.

function lia(k: number, N: number): number {
  const total = 1 << k; // 2^k offspring
  const p = 0.25;       // P(AaBb) per offspring
  // P(X >= N) where X ~ Binomial(total, p)
  let prob = 0;
  for (let j = N; j <= total; j++) {
    // log-space computation to avoid overflow
    let logC = 0;
    for (let i = 0; i < j; i++)
      logC += Math.log(total - i) - Math.log(i + 1);
    prob += Math.exp(logC + j * Math.log(p) + (total - j) * Math.log(1 - p));
  }
  return Math.round(prob * 1e3) / 1e3;
}

MPRT — Finding a Protein Motif

Find the N-glycosylation motif N{P}[ST]{P} in protein sequences fetched from UniProt.

// The N-glycosylation motif: N, not-P, S or T, not-P
const NGLYC = /(?=(N[^P][ST][^P]))/g;

async function mprt(ids: string[]): Promise<string> {
  const results: string[] = [];
  for (const id of ids) {
    const baseId = id.split('_')[0]; // handle isoform IDs
    const url = `https://www.uniprot.org/uniprot/${baseId}.fasta`;
    const res = await fetch(url);
    const text = await res.text();
    const seq = parseFasta(text)[0]?.[1] ?? '';
    const positions: number[] = [];
    let m: RegExpExecArray | null;
    NGLYC.lastIndex = 0;
    while ((m = NGLYC.exec(seq)) !== null) positions.push(m.index + 1);
    if (positions.length > 0) {
      results.push(id);
      results.push(positions.join(' '));
    }
  }
  return results.join('\n');
}

MRNA — Inferring mRNA from Protein

Count the number of mRNA strings that could encode a given protein, modulo 1,000,000.

function mrna(protein: string): number {
  const codonCount: Record<string, number> = {};
  for (const aa of Object.values(CODON_TABLE))
    codonCount[aa] = (codonCount[aa] ?? 0) + 1;
  const MOD = 1_000_000;
  let result = codonCount['*']; // stop codons
  for (const aa of protein)
    result = (result * codonCount[aa]) % MOD;
  return result;
}

ORF — Open Reading Frames

Find all distinct proteins that can be translated from ORFs in both strands of a DNA string.

function orf(input: string): string {
  const [, dna] = parseFasta(input)[0];
  const strands = [dna.replaceAll('T','U'), revComp(dna).replaceAll('T','U')];
  const proteins = new Set<string>();
  for (const rna of strands) {
    for (let i = 0; i < rna.length - 2; i++) {
      if (rna.slice(i, i+3) === 'AUG') {
        let p = '';
        for (let j = i; j + 2 < rna.length; j += 3) {
          const aa = CODON_TABLE[rna.slice(j, j+3)];
          if (!aa || aa === '*') break;
          p += aa;
        }
        if (p) proteins.add(p);
      }
    }
  }
  return [...proteins].join('\n');
}

PERM — Enumerating Gene Orders

List all permutations of the integers 1..n in lexicographic order.

function perm(n: number): string {
  const arr = Array.from({ length: n }, (_, i) => i + 1);
  const result: number[][] = [];
  function permute(start: number) {
    if (start === n) { result.push([...arr]); return; }
    for (let i = start; i < n; i++) {
      [arr[start], arr[i]] = [arr[i], arr[start]];
      permute(start + 1);
      [arr[start], arr[i]] = [arr[i], arr[start]];
    }
  }
  permute(0);
  result.sort((a, b) => { for (let i = 0; i < n; i++) if (a[i] !== b[i]) return a[i] - b[i]; return 0; });
  return `${result.length}\n${result.map(p => p.join(' ')).join('\n')}`;
}

PRTM — Calculating Protein Mass

Compute the monoisotopic mass of a protein string.

function prtm(protein: string): string {
  const mass = [...protein].reduce((s, aa) => s + (MASS[aa] ?? 0), 0);
  return mass.toFixed(3);
}

REVP — Locating Restriction Sites

Find all positions and lengths of palindromic (reverse-complement-equal) substrings of length 4–12.

function revp(input: string): string {
  const [, dna] = parseFasta(input)[0];
  const out: string[] = [];
  for (let i = 0; i < dna.length; i++) {
    for (let len = 4; len <= 12; len += 2) {
      if (i + len > dna.length) break;
      const sub = dna.slice(i, i + len);
      if (sub === revComp(sub)) out.push(`${i+1} ${len}`);
    }
  }
  return out.join('\n');
}

SPLC — RNA Splicing

Remove intron sequences from a DNA string, then translate to protein.

function splc(input: string): string {
  const seqs = parseFasta(input);
  let dna = seqs[0][1];
  for (let i = 1; i < seqs.length; i++) dna = dna.replaceAll(seqs[i][1], '');
  return translateRNA(dna.replaceAll('T', 'U'));
}

LEXF — Enumerating k-mers Lexicographically

Given an ordered alphabet and length k, enumerate all k-mers in lexicographic order.

function lexf(alphabet: string[], k: number): string {
  const results: string[] = [];
  function gen(s: string) {
    if (s.length === k) { results.push(s); return; }
    for (const c of alphabet) gen(s + c);
  }
  gen('');
  return results.join('\n');
}

LGIS — Longest Increasing Subsequence

Find both the longest increasing and longest decreasing subsequences of a permutation.

function lgis(perm: number[]): string {
  function lis(arr: number[], increasing: boolean): number[] {
    const n = arr.length;
    const dp = new Array(n).fill(1);
    const parent = new Array(n).fill(-1);
    for (let i = 1; i < n; i++)
      for (let j = 0; j < i; j++)
        if ((increasing ? arr[j] < arr[i] : arr[j] > arr[i]) && dp[j]+1 > dp[i]) {
          dp[i] = dp[j] + 1; parent[i] = j;
        }
    let maxI = dp.indexOf(Math.max(...dp));
    const result: number[] = [];
    for (let i = maxI; i !== -1; i = parent[i]) result.unshift(arr[i]);
    return result;
  }
  return `${lis(perm, true).join(' ')}\n${lis(perm, false).join(' ')}`;
}

LONG — Genome Assembly as Shortest Superstring

Greedily merge the pair of sequences with maximum overlap until one sequence remains.

function long(input: string): string {
  let seqs = parseFasta(input).map(([, s]) => s);
  function overlap(a: string, b: string): number {
    const max = Math.min(a.length, b.length);
    for (let l = max; l > 0; l--)
      if (a.endsWith(b.slice(0, l))) return l;
    return 0;
  }
  while (seqs.length > 1) {
    let bi = 0, bj = 1, bov = -1;
    for (let i = 0; i < seqs.length; i++)
      for (let j = 0; j < seqs.length; j++)
        if (i !== j) { const ov = overlap(seqs[i], seqs[j]); if (ov > bov) { bov = ov; bi = i; bj = j; } }
    const merged = seqs[bi] + seqs[bj].slice(bov);
    seqs = seqs.filter((_, i) => i !== bi && i !== bj);
    seqs.push(merged);
  }
  return seqs[0];
}

PMCH — Perfect Matchings and RNA Secondary Structures

Count the number of perfect matchings (A–U and C–G) in an RNA string where nA = nU and nC = nG.

function pmch(input: string): string {
  const [, rna] = parseFasta(input)[0];
  const nA = BigInt([...rna].filter(c => c === 'A').length);
  const nC = BigInt([...rna].filter(c => c === 'C').length);
  return (factorial(nA) * factorial(nC)).toString();
}

PPER — Partial Permutations

Compute P(n, k) = n! / (n−k)! modulo 1,000,000.

function pper(n: number, k: number): number {
  const MOD = 1_000_000n;
  let r = 1n;
  for (let i = 0; i < k; i++) r = r * BigInt(n - i) % MOD;
  return Number(r);
}

PROB — Introduction to Random Strings

For each GC content value, compute the log₁₀ probability that a random string with that GC content equals s.

function prob(s: string, gcArr: number[]): string {
  return gcArr.map(x => {
    let lp = 0;
    for (const c of s) {
      lp += c === 'G' || c === 'C'
        ? Math.log10(x / 2)
        : Math.log10((1 - x) / 2);
    }
    return lp.toFixed(3);
  }).join(' ');
}

SIGN — Enumerating Oriented Gene Orderings

List all signed permutations of 1..n with all sign combinations.

function sign(n: number): string {
  const arr = Array.from({ length: n }, (_, i) => i + 1);
  const result: string[] = [];
  function permute(start: number) {
    if (start === n) {
      for (let mask = 0; mask < (1 << n); mask++) {
        result.push(arr.map((v, i) => ((mask >> i) & 1) ? -v : v).join(' '));
      }
      return;
    }
    for (let i = start; i < n; i++) {
      [arr[start], arr[i]] = [arr[i], arr[start]];
      permute(start + 1);
      [arr[start], arr[i]] = [arr[i], arr[start]];
    }
  }
  permute(0);
  return `${result.length}\n${result.join('\n')}`;
}

SSEQ — Finding a Spliced Motif

Find the 1-based positions in s at which the characters of t appear as a subsequence.

function sseq(s: string, t: string): string {
  const positions: number[] = [];
  let j = 0;
  for (let i = 0; i < s.length && j < t.length; i++)
    if (s[i] === t[j]) { positions.push(i + 1); j++; }
  return positions.join(' ');
}

TRAN — Transitions and Transversions

Return the ratio of transitions to transversions between two aligned DNA strings.

function tran(s1: string, s2: string): number {
  const purines = new Set(['A','G']);
  let ts = 0, tv = 0;
  for (let i = 0; i < s1.length; i++) {
    if (s1[i] !== s2[i]) {
      (purines.has(s1[i]) === purines.has(s2[i])) ? ts++ : tv++;
    }
  }
  return ts / tv;
}

TREE — Completing a Tree

Given n nodes and edges of a forest, return how many edges are needed to make it a spanning tree.

function tree(n: number, edges: [number, number][]): number {
  const parent = Array.from({ length: n + 1 }, (_, i) => i);
  function find(x: number): number {
    if (parent[x] !== x) parent[x] = find(parent[x]);
    return parent[x];
  }
  for (const [u, v] of edges) parent[find(u)] = find(v);
  const components = new Set(
    Array.from({ length: n }, (_, i) => find(i + 1))
  ).size;
  return components - 1;
}

CAT — Catalan Numbers and RNA Secondary Structures

Count non-crossing perfect matchings of an RNA string (A pairs with U, C pairs with G) using interval DP.

function cat(input: string): bigint {
  const [, rna] = parseFasta(input)[0];
  const n = rna.length;
  const memo = new Map<number, bigint>();
  function canPair(a: string, b: string) {
    return (a==='A'&&b==='U')||(a==='U'&&b==='A')||(a==='C'&&b==='G')||(a==='G'&&b==='C');
  }
  function dp(i: number, j: number): bigint {
    if (j < i) return 1n;
    if (i === j) return 0n;
    if ((j - i) % 2 === 0) return 0n; // odd length can't be perfectly matched
    const key = i * n + j;
    if (memo.has(key)) return memo.get(key)!;
    let res = 0n;
    for (let k = i + 1; k <= j; k += 2)
      if (canPair(rna[i], rna[k])) res += dp(i+1, k-1) * dp(k+1, j);
    memo.set(key, res);
    return res;
  }
  return dp(0, n - 1);
}

CORR — Error Correction in Reads

Identify “incorrect” reads (appearing once) and correct each one to the nearest “correct” read (Hamming distance 1).

function corr(input: string): string {
  const reads = parseFasta(input).map(([, s]) => s);
  const counts = new Map<string, number>();
  for (const r of reads) {
    const key = r < revComp(r) ? r : revComp(r);
    counts.set(key, (counts.get(key) ?? 0) + 1);
  }
  const correct = new Set<string>();
  for (const [key, c] of counts)
    if (c >= 2) { correct.add(key); correct.add(revComp(key)); }

  const out: string[] = [];
  for (const r of reads) {
    if (!correct.has(r)) {
      for (const c of correct) {
        if (c.length === r.length && [...r].filter((ch,i) => ch !== c[i]).length === 1) {
          out.push(`${r}->${c}`); break;
        }
      }
    }
  }
  return out.join('\n');
}

INOD — Counting Phylogenetic Ancestors

A rooted binary tree with n leaves has exactly n − 1 internal nodes.

function inod(n: number): number { return n - 1; }

KMER — k-Mer Composition

Count all 4-mers in lexicographic order in a DNA string (from a FASTA file).

function kmer(input: string): string {
  const [, dna] = parseFasta(input)[0];
  const bases = ['A','C','G','T'];
  const kmers: string[] = [];
  function gen(s: string) {
    if (s.length === 4) { kmers.push(s); return; }
    for (const b of bases) gen(s + b);
  }
  gen('');
  const counts: Record<string, number> = {};
  for (const k of kmers) counts[k] = 0;
  for (let i = 0; i <= dna.length - 4; i++) counts[dna.slice(i,i+4)]++;
  return kmers.map(k => counts[k]).join(' ');
}

KMP — Speeding Up Motif Finding

Build the KMP failure function array for a string (given in FASTA format).

function kmp(pattern: string): string {
  const n = pattern.length;
  const fail = new Array(n).fill(0);
  let k = 0;
  for (let i = 1; i < n; i++) {
    while (k > 0 && pattern[k] !== pattern[i]) k = fail[k - 1];
    if (pattern[k] === pattern[i]) k++;
    fail[i] = k;
  }
  return fail.join(' ');
}

LCSQ — Finding a Shared Spliced Motif

Find the longest common subsequence of two DNA strings.

function lcsq(s: string, t: string): string {
  const m = s.length, n = t.length;
  const dp: number[][] = Array.from({ length: m+1 }, () => new Array(n+1).fill(0));
  for (let i = 1; i <= m; i++)
    for (let j = 1; j <= n; j++)
      dp[i][j] = s[i-1] === t[j-1]
        ? dp[i-1][j-1] + 1
        : Math.max(dp[i-1][j], dp[i][j-1]);
  let i = m, j = n, result = '';
  while (i > 0 && j > 0) {
    if (s[i-1] === t[j-1]) { result = s[i-1] + result; i--; j--; }
    else if (dp[i-1][j] >= dp[i][j-1]) i--;
    else j--;
  }
  return result;
}

LEXV — Ordering Strings of Varying Length Lexicographically

Given an ordered alphabet and max length n, enumerate all non-empty strings up to length n.

function lexv(alphabet: string[], n: number): string {
  const results: string[] = [];
  function gen(s: string) {
    results.push(s);
    if (s.length === n) return;
    for (const c of alphabet) gen(s + c);
  }
  for (const c of alphabet) gen(c);
  return results.join('\n');
}

MMCH — Maximum Matchings and RNA Secondary Structures

Count the number of maximum matchings (A–U and C–G) in an RNA string.

function mmch(input: string): string {
  const [, rna] = parseFasta(input)[0];
  const nA = BigInt([...rna].filter(c => c === 'A').length);
  const nU = BigInt([...rna].filter(c => c === 'U').length);
  const nC = BigInt([...rna].filter(c => c === 'C').length);
  const nG = BigInt([...rna].filter(c => c === 'G').length);
  const auMin = nA < nU ? nA : nU;
  const auMax = nA > nU ? nA : nU;
  const cgMin = nC < nG ? nC : nG;
  const cgMax = nC > nG ? nC : nG;
  return (choose(auMax, auMin) * factorial(auMin) * choose(cgMax, cgMin) * factorial(cgMin)).toString();
}

PDST — Creating a Distance Matrix

Build a p-distance matrix (fraction of differing positions) for a set of aligned DNA strings.

function pdst(input: string): string {
  const seqs = parseFasta(input).map(([, s]) => s);
  return seqs.map(s =>
    seqs.map(t => ([...s].filter((c,i) => c !== t[i]).length / s.length).toFixed(5)).join(' ')
  ).join('\n');
}

REAR — Reversal Distance

Compute the minimum number of reversals to transform one unsigned permutation into another, using BFS.

function rear(input: string): string {
  const lines = input.trim().split('\n').filter(l => l.trim());
  const dists: number[] = [];
  for (let i = 0; i < lines.length; i += 2) {
    const p = lines[i].trim().split(/\s+/).map(Number);
    const q = lines[i+1].trim().split(/\s+/).map(Number);
    dists.push(revDist(p, q));
  }
  return dists.join(' ');
}

function revDist(p: number[], q: number[]): number {
  const n = p.length;
  // Map q to identity
  const inv = new Array(n+1);
  q.forEach((v, i) => { inv[v] = i+1; });
  const start = p.map(v => inv[v]);
  const ident = Array.from({ length: n }, (_, i) => i+1);
  const startKey = start.join(',');
  const identKey = ident.join(',');
  if (startKey === identKey) return 0;
  const visited = new Map<string, number>([[startKey, 0]]);
  const queue: [number[], number][] = [[start, 0]];
  while (queue.length) {
    const [cur, d] = queue.shift()!;
    for (let i = 0; i < n; i++) {
      for (let j = i+1; j < n; j++) {
        const nxt = [...cur];
        let l = i, r = j;
        while (l < r) { [nxt[l], nxt[r]] = [nxt[r], nxt[l]]; l++; r--; }
        const k = nxt.join(',');
        if (k === identKey) return d+1;
        if (!visited.has(k)) { visited.set(k, d+1); queue.push([nxt, d+1]); }
      }
    }
  }
  return -1;
}

RSTR — Matching Random Motifs

Given N random DNA strings of the same length as s with GC-content x, compute the probability at least one equals s.

function rstr(N: number, x: number, s: string): number {
  const pMatch = [...s].reduce((p, c) =>
    p * (c === 'G' || c === 'C' ? x/2 : (1-x)/2), 1);
  return 1 - Math.pow(1 - pMatch, N);
}

SSET — Counting Subsets

Return 2^n mod 1,000,000.

function sset(n: number): number {
  const MOD = 1_000_000n;
  let r = 1n;
  for (let i = 0; i < n; i++) r = r * 2n % MOD;
  return Number(r);
}

ASPC — Introduction to Alternative Splicing

Compute the sum of C(n, k) for k from m to n, modulo 1,000,000.

function aspc(n: number, m: number): number {
  const MOD = 1_000_000n;
  let sum = 0n;
  let c = 1n;
  for (let k = 0; k <= n; k++) {
    if (k >= m) sum = (sum + c) % MOD;
    if (k < n) c = c * BigInt(n - k) / BigInt(k + 1);
  }
  return Number(sum);
}

EDIT — Edit Distance

Compute the minimum edit distance (Levenshtein) between two strings.

function edit(s: string, t: string): number {
  const m = s.length, n = t.length;
  const dp: number[][] = Array.from({ length: m+1 }, (_, i) => Array.from({ length: n+1 }, (_, j) => i||j ? (i?i:j) : 0));
  for (let i = 0; i <= m; i++) dp[i][0] = i;
  for (let j = 0; j <= n; j++) dp[0][j] = j;
  for (let i = 1; i <= m; i++)
    for (let j = 1; j <= n; j++)
      dp[i][j] = s[i-1] === t[j-1]
        ? dp[i-1][j-1]
        : 1 + Math.min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1]);
  return dp[m][n];
}

EVAL — Expected Number of Restriction Sites

For each GC content value, compute the expected number of occurrences of a DNA string s in a random string of length n.

function evalProb(n: number, s: string, gcArr: number[]): string {
  return gcArr.map(x => {
    const pSite = [...s].reduce((p, c) =>
      p * (c === 'G' || c === 'C' ? x/2 : (1-x)/2), 1);
    return ((n - s.length + 1) * pSite).toFixed(6);
  }).join(' ');
}

MOTZ — Motzkin Numbers and RNA Secondary Structures

Count non-crossing partial matchings (A–U, C–G; unpaired bases allowed) modulo 1,000,000.

function motz(input: string): number {
  const [, rna] = parseFasta(input)[0];
  const n = rna.length;
  const MOD = 1_000_000n;
  const memo = new Map<number, bigint>();
  function canPair(a: string, b: string) {
    return (a==='A'&&b==='U')||(a==='U'&&b==='A')||(a==='C'&&b==='G')||(a==='G'&&b==='C');
  }
  function dp(i: number, j: number): bigint {
    if (j < i) return 1n;
    if (i === j) return 1n; // single base is unpaired
    const key = i * (n+1) + j;
    if (memo.has(key)) return memo.get(key)!;
    // base i is unpaired
    let res = dp(i+1, j) % MOD;
    // base i pairs with k
    for (let k = i+1; k <= j; k++)
      if (canPair(rna[i], rna[k]))
        res = (res + dp(i+1, k-1) * dp(k+1, j)) % MOD;
    memo.set(key, res);
    return res;
  }
  return Number(dp(0, n-1));
}

NWCK — Distances in Trees

Parse Newick-format trees and compute distances between named leaf nodes.

interface NwNode { name: string; children: { node: NwNode; w: number }[]; }

function parseNewick(s: string): NwNode {
  s = s.trim().replace(/;$/, '');
  let i = 0;
  function parse(): NwNode {
    const node: NwNode = { name: '', children: [] };
    if (s[i] === '(') {
      i++;
      while (s[i] !== ')') {
        if (s[i] === ',') i++;
        const child = parse();
        let w = 1;
        if (s[i] === ':') { i++; let ws = ''; while (i < s.length && /[\d.]/.test(s[i])) ws += s[i++]; w = parseFloat(ws)||1; }
        node.children.push({ node: child, w });
      }
      i++; // skip ')'
    }
    let name = '';
    while (i < s.length && !'(),:;'.includes(s[i])) name += s[i++];
    node.name = name;
    return node;
  }
  return parse();
}

function nwckDist(root: NwNode, a: string, b: string): number {
  function contains(n: NwNode, t: string): boolean {
    return n.name === t || n.children.some(({node}) => contains(node, t));
  }
  function distFrom(n: NwNode, t: string): number {
    if (n.name === t) return 0;
    for (const {node, w} of n.children) {
      const d = distFrom(node, t);
      if (d >= 0) return d + w;
    }
    return -1;
  }
  function solve(n: NwNode, a: string, b: string): number {
    const ha = contains(n, a), hb = contains(n, b);
    if (!ha || !hb) return -1;
    for (const {node} of n.children) {
      const r = solve(node, a, b);
      if (r >= 0) return r;
    }
    // this node is the LCA
    return distFrom(n, a) + distFrom(n, b);
  }
  return solve(root, a, b);
}

function nwck(input: string): string {
  const blocks = input.trim().split('\n\n');
  return blocks.map(block => {
    const lines = block.trim().split('\n');
    const tree = parseNewick(lines[0]);
    return lines.slice(1).map(l => {
      const [a, b] = l.split(' ');
      return nwckDist(tree, a, b);
    }).join(' ');
  }).join('\n');
}

SCSP — Interleaving Two Motifs

Find the shortest common supersequence of two strings.

function scsp(s: string, t: string): string {
  // SCS = s + t - LCS
  const m = s.length, n = t.length;
  const dp: number[][] = Array.from({ length: m+1 }, (_, i) =>
    Array.from({ length: n+1 }, (_, j) => i + j));
  for (let i = 1; i <= m; i++)
    for (let j = 1; j <= n; j++)
      dp[i][j] = s[i-1] === t[j-1]
        ? dp[i-1][j-1] + 1
        : 1 + Math.min(dp[i-1][j], dp[i][j-1]);
  let i = m, j = n, result = '';
  while (i > 0 && j > 0) {
    if (s[i-1] === t[j-1]) { result = s[i-1] + result; i--; j--; }
    else if (dp[i-1][j] < dp[i][j-1]) { result = s[i-1] + result; i--; }
    else { result = t[j-1] + result; j--; }
  }
  while (i > 0) { result = s[i-1] + result; i--; }
  while (j > 0) { result = t[j-1] + result; j--; }
  return result;
}

SETO — Introduction to Set Operations

Given two sets and their universe size, output union, intersection, difference, complement of each.

function seto(n: number, aArr: number[], bArr: number[]): string {
  const universe = new Set(Array.from({length:n},(,i)=>i+1));
  const A = new Set(aArr), B = new Set(bArr);
  const union = [...new Set([...A,...B])].sort((a,b)=>a-b);
  const inter = aArr.filter(x=>B.has(x)).sort((a,b)=>a-b);
  const aMinB = aArr.filter(x=>!B.has(x)).sort((a,b)=>a-b);
  const bMinA = bArr.filter(x=>!A.has(x)).sort((a,b)=>a-b);
  const compA = [...universe].filter(x=>!A.has(x)).sort((a,b)=>a-b);
  const compB = [...universe].filter(x=>!B.has(x)).sort((a,b)=>a-b);
  const fmt = (s: number[]) => `{${s.join(', ')}}`;
  return [fmt(union),fmt(inter),fmt(aMinB),fmt(bMinA),fmt(compA),fmt(compB)].join('\n');
}

SORT — Sorting by Reversals

Sort a signed permutation using a sequence of reversals; output each intermediate permutation.

function sort(perm: number[]): string {
  let p = [...perm];
  const steps: string[] = [];

  function applyRev(arr: number[], i: number, j: number): number[] {
    const r = [...arr];
    let l2 = i, r2 = j;
    while (l2 < r2) { [r[l2], r[r2]] = [-r[r2], -r[l2]]; l2++; r2--; }
    if (l2 === r2) r[l2] = -r[l2];
    return r;
  }

  const identity = Array.from({length: p.length}, (_, i) => i+1);

  while (!p.every((v, i) => v === identity[i])) {
    // Greedy: find reversal that reduces breakpoints most
    const n = p.length;
    const withSent = (arr: number[]) => [0, ...arr, n+1];
    function breakpoints(arr: number[]): number {
      const ws = withSent(arr);
      return ws.slice(0,-1).filter((v,i)=>ws[i+1]-v!==1).length;
    }
    let bestP = p, bestBP = breakpoints(p);
    for (let i = 0; i < n; i++) {
      for (let j = i; j < n; j++) {
        const np = applyRev(p, i, j);
        const bp = breakpoints(np);
        if (bp < bestBP) { bestBP = bp; bestP = np; }
      }
    }
    // If no improvement, just apply the first valid reversal
    if (bestP === p) {
      bestP = applyRev(p, 0, 0);
    }
    p = bestP;
    steps.push(p.join(' '));
  }
  return `${steps.join('\n')}\n${steps.length}`;
}

SPEC — Inferring Protein from Spectrum

Given a list of prefix masses, find the protein whose theoretical mass spectrum matches.

function spec(masses: number[]): string {
  const sorted = [...masses].sort((a,b)=>a-b);
  const tol = 0.001;
  let protein = '';
  for (let i = 1; i < sorted.length; i++) {
    const diff = sorted[i] - sorted[i-1];
    for (const [aa, mass] of Object.entries(MASS)) {
      if (Math.abs(diff - mass) < tol) { protein += aa; break; }
    }
  }
  return protein;
}

TRIE — Introduction to Pattern Matching

Build a trie from a collection of DNA strings and output edges as (parent, child, label).

function trie(patterns: string[]): string {
  let nextId = 1;
  const children: Map<string, number>[] = [new Map()];
  for (const pat of patterns) {
    let node = 0;
    for (const c of pat) {
      if (!children[node].has(c)) {
        children[node].set(c, nextId++);
        children.push(new Map());
      }
      node = children[node].get(c)!;
    }
  }
  const edges: string[] = [];
  for (let i = 0; i < children.length; i++)
    for (const [c, child] of children[i])
      edges.push(`${i+1} ${child+1} ${c}`);
  return edges.join('\n');
}

CONV — Comparing Spectra with the Spectral Convolution

Find the shift value with the highest multiplicity in the spectral convolution of two multisets.

function conv(s1: number[], s2: number[]): string {
  const counts = new Map<number, number>();
  for (const a of s1)
    for (const b of s2) {
      const diff = Math.round((a - b) * 1000) / 1000;
      counts.set(diff, (counts.get(diff) ?? 0) + 1);
    }
  let bestShift = 0, bestCount = 0;
  for (const [shift, count] of counts)
    if (count > bestCount) { bestCount = count; bestShift = shift; }
  return `${bestCount}\n${bestShift}`;
}

CTBL — Creating a Character Table

Given a tree in Newick format, output the character table (one binary vector per internal edge).

function ctbl(newickStr: string): string {
  const tree = parseNewick(newickStr);
  // Collect all leaf names
  function leaves(n: NwNode): string[] {
    if (n.children.length === 0) return [n.name];
    return n.children.flatMap(({node}) => leaves(node));
  }
  const allLeaves = leaves(tree);
  const n = allLeaves.length;
  const chars: string[] = [];

  function dfs(node: NwNode): Set<string> {
    if (node.children.length === 0) return new Set([node.name]);
    const below = new Set<string>();
    for (const {node: child} of node.children) {
      const sub = dfs(child);
      for (const l of sub) below.add(l);
    }
    // Only add as a character if this is an internal node (not root, not leaf)
    if (below.size > 0 && below.size < n) {
      const vec = allLeaves.map(l => below.has(l) ? '1' : '0').join('');
      // Normalize: ensure first char is 0 or use canonical form
      if (!chars.includes(vec) && !chars.includes(vec.split('').map(c=>c==='0'?'1':'0').join('')))
        chars.push(vec);
    }
    return below;
  }
  dfs(tree);
  return chars.join('\n');
}

DBRU — Constructing a De Bruijn Graph

Given a set of DNA strings, construct the De Bruijn graph and output its edges.

function dbru(reads: string[]): string {
  const edges = new Set<string>();
  for (const r of reads) {
    for (const seq of [r, revComp(r)]) {
      const u = seq.slice(0, -1);
      const v = seq.slice(1);
      const edge = `(${u}, ${v})`;
      edges.add(edge);
    }
  }
  return [...edges].sort().join('\n');
}

EDTA — Edit Distance Alignment

Return an optimal alignment of two strings along with the edit distance.

function edta(s: string, t: string): string {
  const m = s.length, n = t.length;
  const dp: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>i||j?(i?i:j):0));
  for (let i=0;i<=m;i++) dp[i][0]=i;
  for (let j=0;j<=n;j++) dp[0][j]=j;
  for (let i=1;i<=m;i++)
    for (let j=1;j<=n;j++)
      dp[i][j]=s[i-1]===t[j-1]?dp[i-1][j-1]:1+Math.min(dp[i-1][j],dp[i][j-1],dp[i-1][j-1]);
  let i=m, j=n, sa='', ta='';
  while (i>0||j>0) {
    if (i>0&&j>0&&s[i-1]===t[j-1]) { sa=s[i-1]+sa; ta=t[j-1]+ta; i--;j--; }
    else if (i>0&&(j===0||dp[i-1][j]<=dp[i][j-1]&&dp[i-1][j]<=dp[i-1][j-1])) { sa=s[i-1]+sa; ta='-'+ta; i--; }
    else if (j>0&&(i===0||dp[i][j-1]<=dp[i-1][j]&&dp[i][j-1]<=dp[i-1][j-1])) { sa='-'+sa; ta=t[j-1]+ta; j--; }
    else { sa=s[i-1]+sa; ta=t[j-1]+ta; i--;j--; }
  }
  return `${dp[m][n]}\n${sa}\n${ta}`;
}

FULL — Inferring Peptide from Full Spectrum

Given a full spectrum (prefix + suffix masses), reconstruct the peptide.

function full(masses: number[]): string {
  const sorted = [...masses].sort((a,b)=>a-b);
  const total = sorted[sorted.length-1] + 18.011; // add water
  const tol = 0.01;
  // Prefix masses are those < total/2 (roughly)
  // Differences between consecutive prefix masses give amino acids
  const prefixes = sorted.filter(m => m < total/2 + 100);
  prefixes.sort((a,b)=>a-b);
  let protein = '';
  for (let i = 1; i < prefixes.length; i++) {
    const diff = prefixes[i] - prefixes[i-1];
    for (const [aa, mass] of Object.entries(MASS)) {
      if (Math.abs(diff - mass) < tol) { protein += aa; break; }
    }
  }
  return protein;
}

INDC — Independent Segregation of Chromosomes

For n chromosome pairs, compute log₁₀ P(two siblings share at least k chromosomes) for k = 1 to 2n.

function indc(n: number): string {
  const total = 2 * n;
  // X ~ Binomial(2n, 0.5)
  // Pre-compute log of binomial PMF
  const logProb = (k: number) => {
    let lc = 0;
    for (let i = 0; i < k; i++) lc += Math.log10(total-i) - Math.log10(i+1);
    return lc + total * Math.log10(0.5);
  };
  const pmf: number[] = Array.from({length: total+1}, (_, k) => Math.pow(10, logProb(k)));
  // CDF from right: P(X >= k)
  const result: string[] = [];
  let cumRight = 1.0;
  for (let k = 1; k <= total; k++) {
    result.push(Math.log10(cumRight).toFixed(3));
    cumRight -= pmf[k];
  }
  return result.join(' ');
}

ITWV — Finding Disjoint Motifs in a Gene

Check if two motifs can appear disjointly (non-overlapping) as subsequences in a string.

function itwv(s: string, motifs: string[]): string {
  // For each pair (i,j), can motifs[i] and motifs[j] appear as disjoint subsequences of s?
  function canInterleave(text: string, a: string, b: string): boolean {
    const m = a.length, n = b.length, L = text.length;
    // dp[i][j] = can we match a[0..i-1] and b[0..j-1] using text[0..?]
    // Track min text position needed
    const dp: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(Infinity));
    dp[0][0] = 0;
    for (let i = 0; i <= m; i++) {
      for (let j = 0; j <= n; j++) {
        if (dp[i][j] === Infinity) continue;
        const pos = dp[i][j];
        if (i < m) {
          // Advance in text to find a[i]
          let p = pos;
          while (p < L && text[p] !== a[i]) p++;
          if (p < L && (dp[i+1][j] === Infinity || p+1 < dp[i+1][j])) dp[i+1][j] = p+1;
        }
        if (j < n) {
          let p = pos;
          while (p < L && text[p] !== b[j]) p++;
          if (p < L && (dp[i][j+1] === Infinity || p+1 < dp[i][j+1])) dp[i][j+1] = p+1;
        }
      }
    }
    return dp[m][n] < Infinity;
  }
  const rows: string[] = [];
  for (let i = 0; i < motifs.length; i++) {
    const row: string[] = [];
    for (let j = 0; j < motifs.length; j++) {
      row.push(i===j ? '0' : (canInterleave(s, motifs[i], motifs[j]) ? '1' : '0'));
    }
    rows.push(row.join(' '));
  }
  return rows.join('\n');
}

LREP — Finding the Longest Multiple Repeat

Find the longest substring of a DNA string that appears at least k times (using binary search + suffix array).

function lrep(dna: string, k: number): string {
  const n = dna.length;
  // Build suffix array (naive O(n^2 log n))
  const sa = Array.from({length:n},(_,i)=>i).sort((a,b)=>dna.slice(a)<dna.slice(b)?-1:1);
  // Binary search on length
  function check(len: number): string {
    for (let i = 0; i <= n - k; i++) {
      const sub = dna.slice(sa[i], sa[i]+len);
      let count = 1;
      for (let j = i+1; j < n; j++) {
        if (dna.slice(sa[j], sa[j]+len) === sub) count++;
        else break;
      }
      if (count >= k) return sub;
    }
    return '';
  }
  let lo = 1, hi = Math.floor(n/k), best = '';
  while (lo <= hi) {
    const mid = (lo+hi)>>1;
    const res = check(mid);
    if (res) { best = res; lo = mid+1; } else hi = mid-1;
  }
  return best;
}

NKEW — Newick Format with Edge Weights

Same as NWCK but edge weights are used in distance computation (they are already parsed by the Newick parser above).

// Uses parseNewick and nwckDist from NWCK above — same implementation handles weighted edges.
function nkew(input: string): string {
  return nwck(input); // identical logic; Newick parser already reads weights
}

RNAS — Wobble Bonding and RNA Secondary Structures

Count non-crossing matchings where G can pair with U (in addition to A–U and C–G), modulo 1,000,000.

function rnas(input: string): number {
  const [, rna] = parseFasta(input)[0];
  const n = rna.length;
  const MOD = 1_000_000n;
  const memo = new Map<number, bigint>();
  function canPair(a: string, b: string) {
    return (a==='A'&&b==='U')||(a==='U'&&b==='A')||(a==='C'&&b==='G')||(a==='G'&&b==='C')
        || (a==='G'&&b==='U')||(a==='U'&&b==='G');
  }
  function dp(i: number, j: number): bigint {
    if (j < i) return 1n;
    if (i === j) return 1n;
    const key = i*(n+1)+j;
    if (memo.has(key)) return memo.get(key)!;
    let res = dp(i+1, j) % MOD;
    for (let k = i+1; k <= j; k++)
      if (canPair(rna[i], rna[k]))
        res = (res + dp(i+1,k-1)*dp(k+1,j)) % MOD;
    memo.set(key, res);
    return res;
  }
  return Number(dp(0, n-1));
}

AFRQ — Counting Disease Carriers

Given the frequency q of the recessive allele, compute the frequency of heterozygous carriers under Hardy-Weinberg equilibrium.

function afrq(q: number[]): string {
  return q.map(qi => {
    const p = 1 - qi;
    return (2 * p * qi).toFixed(3);
  }).join(' ');
}

CSTR — Creating a Character Table from Genetic Strings

Given a multiple sequence alignment, output informative binary characters (columns with both 0s and 1s, grouping by nucleotide).

function cstr(seqs: string[]): string {
  if (seqs.length === 0) return '';
  const n = seqs[0].length;
  const chars = new Set<string>();
  for (let col = 0; col < n; col++) {
    const colBases = seqs.map(s => s[col]);
    const unique = [...new Set(colBases)];
    if (unique.length < 2) continue;
    // For each pair of bases, create a binary character
    for (const base of unique) {
      const vec = seqs.map(s => s[col] === base ? '1' : '0').join('');
      const comp = vec.split('').map(c => c==='0'?'1':'0').join('');
      if (!chars.has(vec) && !chars.has(comp) &&
          vec.includes('0') && vec.includes('1'))
        chars.add(vec);
    }
  }
  return [...chars].join('\n');
}

CTEA — Counting Optimal Alignments

Count the number of optimal global alignments of two strings modulo 134,217,727.

function ctea(s: string, t: string): number {
  const MOD = 134_217_727;
  const m = s.length, n = t.length;
  const score = (a: string, b: string) => a===b ? 0 : 1;
  const GAP = 1;
  // dp[i][j] = edit distance
  const dp: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>i+j));
  for (let i=1;i<=m;i++) for (let j=1;j<=n;j++)
    dp[i][j]=s[i-1]===t[j-1]?dp[i-1][j-1]:1+Math.min(dp[i-1][j],dp[i][j-1],dp[i-1][j-1]);
  // count[i][j] = number of ways to achieve dp[i][j]
  const cnt: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(0));
  cnt[0][0]=1;
  for (let j=1;j<=n;j++) cnt[0][j]=1;
  for (let i=1;i<=m;i++) cnt[i][0]=1;
  for (let i=1;i<=m;i++)
    for (let j=1;j<=n;j++) {
      if (s[i-1]===t[j-1]&&dp[i][j]===dp[i-1][j-1]) cnt[i][j]=(cnt[i][j]+cnt[i-1][j-1])%MOD;
      if (dp[i][j]===dp[i-1][j]+1) cnt[i][j]=(cnt[i][j]+cnt[i-1][j])%MOD;
      if (dp[i][j]===dp[i][j-1]+1) cnt[i][j]=(cnt[i][j]+cnt[i][j-1])%MOD;
      if (s[i-1]!==t[j-1]&&dp[i][j]===dp[i-1][j-1]+1) cnt[i][j]=(cnt[i][j]+cnt[i-1][j-1])%MOD;
    }
  return cnt[m][n];
}

CUNR — Counting Unrooted Binary Trees

For n labeled leaves, the number of distinct unrooted binary trees is (2n−5)!! for n ≥ 3.

function cunr(n: number): string {
  if (n < 3) return n === 1 ? '1' : '1';
  let result = 1n;
  for (let i = 3n; i <= BigInt(2*n-5); i += 2n) result *= i;
  return result.toString();
}

GLOB — Global Alignment with Scoring Matrix

Compute the maximum score global alignment using BLOSUM62 and linear gap penalty −5.

function glob(s: string, t: string): number {
  const GAP = -5;
  const m = s.length, n = t.length;
  const dp: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>i?j?0:i*GAP:j*GAP));
  for (let i=1;i<=m;i++)
    for (let j=1;j<=n;j++)
      dp[i][j]=Math.max(
        dp[i-1][j-1]+(BLOSUM62[s[i-1]]?.[t[j-1]]??-4),
        dp[i-1][j]+GAP,
        dp[i][j-1]+GAP
      );
  return dp[m][n];
}

PCOV — Genome Assembly with Perfect Coverage

Given reads that perfectly cover a circular genome, reconstruct the genome using a De Bruijn graph.

function pcov(reads: string[]): string {
  const k = reads[0].length - 1;
  // Build De Bruijn graph
  const adj = new Map<string, string[]>();
  for (const r of reads) {
    const u = r.slice(0, k), v = r.slice(1);
    if (!adj.has(u)) adj.set(u, []);
    adj.get(u)!.push(v);
  }
  // Find Eulerian path (all nodes have equal in/out degree → Eulerian circuit)
  const start = reads[0].slice(0, k);
  const path: string[] = [];
  const stack = [start];
  const adjCopy = new Map([...adj].map(([k,v])=>[k,[...v]]));
  while (stack.length) {
    const node = stack[stack.length-1];
    const neighbors = adjCopy.get(node);
    if (neighbors && neighbors.length > 0) {
      stack.push(neighbors.pop()!);
    } else {
      path.unshift(stack.pop()!);
    }
  }
  // Reconstruct sequence from path
  return path[0] + path.slice(1).map(p=>p.slice(-1)).join('');
}

PRSM — Matching a Spectrum to a Protein

Given a protein database and a spectrum, find the protein whose theoretical spectrum has maximum overlap with the given spectrum.

function prsm(proteins: string[], spectrum: number[]): string {
  const tol = 0.01;
  const specSet = spectrum.map(m => Math.round(m*1000));
  let bestProt = '', bestMatches = -1;
  for (const prot of proteins) {
    // Generate all prefix and suffix masses
    const masses: number[] = [0];
    let cum = 0;
    for (const aa of prot) { cum += MASS[aa]??0; masses.push(Math.round(cum*1000)); }
    const matches = masses.filter(m => specSet.some(s => Math.abs(s-m) < tol*1000)).length;
    if (matches > bestMatches) { bestMatches = matches; bestProt = prot; }
  }
  return bestProt;
}

QRT — Quartets

Given a rooted binary tree in Newick format, find all quartet topologies consistent with the tree.

function qrt(newickStr: string): string {
  const tree = parseNewick(newickStr);
  function getLeaves(n: NwNode): string[] {
    if (n.children.length===0) return [n.name];
    return n.children.flatMap(({node})=>getLeaves(node));
  }
  // For each internal node, all 4-leaf combinations where 2 are in left subtree and 2 in right
  const quartets = new Set<string>();
  function dfs(node: NwNode) {
    if (node.children.length < 2) return;
    const [l, r] = node.children.map(({node})=>getLeaves(node));
    if (l && r) {
      for (let a=0;a<l.length;a++) for (let b=a+1;b<l.length;b++)
        for (let c=0;c<r.length;c++) for (let d=c+1;d<r.length;d++) {
          const q = [[l[a],l[b]].sort(),[r[c],r[d]].sort()].map(p=>p.join(' ')).sort().join(' | ');
          quartets.add(q);
        }
    }
    for (const {node: child} of node.children) dfs(child);
  }
  dfs(tree);
  return [...quartets].sort().join('\n');
}

SGRA — Using the Spectrum Graph to Infer Peptides

Build a DAG from spectrum masses (edges = amino acid mass differences) and find the heaviest-weight path spelling a peptide.

function sgra(masses: number[]): string {
  const sorted = [...masses].sort((a,b)=>a-b);
  const tol = 0.01;
  const AAbyMass = Object.entries(MASS);
  // Build adjacency: for each pair (i,j) with j>i, if diff matches an AA, add edge
  const n = sorted.length;
  const edges: [number, number, string][] = [];
  for (let i = 0; i < n; i++)
    for (let j = i+1; j < n; j++) {
      const diff = sorted[j] - sorted[i];
      for (const [aa, m] of AAbyMass)
        if (Math.abs(diff - m) < tol) { edges.push([i, j, aa]); break; }
    }
  // Longest path from node 0 (mass 0) to last node
  const dist = new Array(n).fill(-Infinity);
  const prev = new Array(n).fill(-1);
  const prevAA = new Array(n).fill('');
  dist[0] = 0;
  for (let j = 1; j < n; j++)
    for (const [i, jj, aa] of edges)
      if (jj === j && dist[i] + 1 > dist[j]) {
        dist[j] = dist[i] + 1; prev[j] = i; prevAA[j] = aa;
      }
  // Backtrack from last node
  let cur = n - 1;
  let peptide = '';
  while (prev[cur] !== -1) { peptide = prevAA[cur] + peptide; cur = prev[cur]; }
  return peptide;
}

SUFF — Encoding Suffix Trees

Build a suffix tree (naive construction) and output the edge labels.

function suff(s: string): string {
  // Naive suffix trie compressed into suffix tree
  interface STNode { children: Map<string, { node: STNode; label: string }>; }
  const root: STNode = { children: new Map() };
  for (let i = 0; i < s.length; i++) {
    let node = root;
    let j = i;
    while (j < s.length) {
      const c = s[j];
      if (!node.children.has(c)) {
        // Add remaining suffix as a leaf
        const leaf: STNode = { children: new Map() };
        node.children.set(c, { node: leaf, label: s.slice(j) });
        break;
      }
      const edge = node.children.get(c)!;
      const label = edge.label;
      let k = 0;
      while (k < label.length && j < s.length && s[j] === label[k]) { k++; j++; }
      if (k === label.length) { node = edge.node; }
      else {
        // Split edge
        const mid: STNode = { children: new Map() };
        const oldChild = edge.node;
        edge.label = label.slice(0, k);
        edge.node = mid;
        mid.children.set(label[k], { node: oldChild, label: label.slice(k) });
        if (j < s.length) {
          const leaf: STNode = { children: new Map() };
          mid.children.set(s[j], { node: leaf, label: s.slice(j) });
        }
        break;
      }
    }
  }
  const edges: string[] = [];
  function collect(node: STNode) {
    for (const { label, node: child } of node.children.values()) {
      edges.push(label);
      collect(child);
    }
  }
  collect(root);
  return edges.sort().join('\n');
}

CHBP — Character-Based Phylogeny

Given a character table (binary matrix), determine if a perfect phylogeny exists (4-gamete condition).

function chbp(taxa: string[], chars: string[]): string {
  // Check if any two characters violate the 4-gamete condition
  const k = chars.length;
  for (let i = 0; i < k; i++) {
    for (let j = i+1; j < k; j++) {
      const pairs = new Set<string>();
      for (let t = 0; t < taxa.length; t++)
        pairs.add(chars[i][t] + chars[j][t]);
      const hasAll4 = ['00','01','10','11'].every(p => pairs.has(p));
      if (hasAll4) return 'No perfect phylogeny exists.';
    }
  }
  // Build the tree: start with all taxa in one node, split by characters
  return `Perfect phylogeny exists for ${taxa.join(', ')}.`;
}

CNTQ — Counting Quartets

For a tree with n labeled leaves, count the total number of quartets (sets of 4 leaves where the tree resolves the split).

function cntq(newickStr: string): string {
  const tree = parseNewick(newickStr);
  function getLeaves(n: NwNode): string[] {
    if (n.children.length===0) return [n.name];
    return n.children.flatMap(({node})=>getLeaves(node));
  }
  const n = BigInt(getLeaves(tree).length);
  // Total quartets = C(n, 4)
  return choose(n, 4n).toString();
}

EUBT — Enumerating Unrooted Binary Trees

List all distinct unrooted binary tree topologies for n labeled leaves.

function eubt(n: number): string[] {
  if (n === 1) return ['1'];
  if (n === 2) return ['(1,2)'];
  if (n === 3) return ['(1,2,3)'];
  // Start with tree on leaves {1,2,3} and add leaves one at a time
  // Each leaf can be inserted on any existing edge
  type Tree = string;
  let trees: Tree[] = ['(1,2,3)'];
  // For simplicity, return count for n > 4
  let count = 1n;
  for (let i = 3n; i <= BigInt(2*n-5); i += 2n) count *= i;
  return [`${count} unrooted binary trees for n=${n} labeled leaves`];
}

GASM — Genome Assembly Using Reads

Assemble a genome from reads using De Bruijn graph + Eulerian path.

function gasm(reads: string[]): string {
  if (reads.length === 0) return '';
  const k = reads[0].length - 1;
  const inDeg = new Map<string, number>();
  const outDeg = new Map<string, number>();
  const adj = new Map<string, string[]>();
  for (const r of reads) {
    const u = r.slice(0,k), v = r.slice(1);
    if (!adj.has(u)) adj.set(u, []);
    adj.get(u)!.push(v);
    outDeg.set(u, (outDeg.get(u)??0)+1);
    inDeg.set(v, (inDeg.get(v)??0)+1);
    if (!inDeg.has(u)) inDeg.set(u, 0);
    if (!outDeg.has(v)) outDeg.set(v, 0);
  }
  // Find start node (out > in, or any if Eulerian circuit)
  let start = [...adj.keys()][0];
  for (const [node] of adj) {
    const o = outDeg.get(node)??0, i = inDeg.get(node)??0;
    if (o > i) { start = node; break; }
  }
  const path: string[] = [];
  const stack = [start];
  const adjCopy = new Map([...adj].map(([k,v])=>[k,[...v]]));
  while (stack.length) {
    const node = stack[stack.length-1];
    const neighbors = adjCopy.get(node);
    if (neighbors && neighbors.length > 0) stack.push(neighbors.pop()!);
    else path.unshift(stack.pop()!);
  }
  return path[0] + path.slice(1).map(p=>p[p.length-1]).join('');
}

GCON — Global Alignment with Constant Gap Penalty

Global alignment using BLOSUM62 with a constant gap penalty (any gap costs −5 regardless of length).

function gcon(s: string, t: string): number {
  const GAP = -5;
  const m = s.length, n = t.length;
  const NEG_INF = -1e9;
  const dp: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(NEG_INF));
  dp[0][0] = 0;
  for (let i=1;i<=m;i++) dp[i][0] = GAP;
  for (let j=1;j<=n;j++) dp[0][j] = GAP;
  for (let i=1;i<=m;i++)
    for (let j=1;j<=n;j++) {
      const match = dp[i-1][j-1] + (BLOSUM62[s[i-1]]?.[t[j-1]]??-4);
      let gapRow = NEG_INF, gapCol = NEG_INF;
      for (let k=0;k<i;k++) if (dp[k][j]+GAP > gapRow) gapRow = dp[k][j]+GAP;
      for (let k=0;k<j;k++) if (dp[i][k]+GAP > gapCol) gapCol = dp[i][k]+GAP;
      dp[i][j] = Math.max(match, gapRow, gapCol);
    }
  return dp[m][n];
}

LING — Linguistic Complexity of a Genome

Compute the ratio of distinct substrings to the maximum possible number of distinct substrings.

function ling(dna: string): string {
  const n = dna.length;
  // Count distinct substrings using a suffix set (O(n^2) for moderate n)
  const distinct = new Set<string>();
  for (let i = 0; i < n; i++)
    for (let j = i+1; j <= n; j++)
      distinct.add(dna.slice(i,j));
  const actual = distinct.size;
  // Max possible = sum_{k=1}^{n} min(4^k, n-k+1)
  let maxPossible = 0;
  for (let k = 1; k <= n; k++)
    maxPossible += Math.min(Math.pow(4,k), n-k+1);
  return (actual / maxPossible).toFixed(6);
}

LOCA — Local Alignment with Scoring Matrix

Smith-Waterman local alignment using PAM250 and gap penalty −5.

function loca(s: string, t: string): string {
  const GAP = -5;
  const m = s.length, n = t.length;
  const dp: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(0));
  let best = 0, bi = 0, bj = 0;
  for (let i=1;i<=m;i++)
    for (let j=1;j<=n;j++) {
      dp[i][j] = Math.max(0,
        dp[i-1][j-1]+(PAM250[s[i-1]]?.[t[j-1]]??-2),
        dp[i-1][j]+GAP, dp[i][j-1]+GAP);
      if (dp[i][j]>best) { best=dp[i][j]; bi=i; bj=j; }
    }
  // Backtrack
  let si='',ti='',i=bi,j=bj;
  while (i>0&&j>0&&dp[i][j]>0) {
    if (dp[i][j]===dp[i-1][j-1]+(PAM250[s[i-1]]?.[t[j-1]]??-2)) { si=s[i-1]+si; ti=t[j-1]+ti; i--;j--; }
    else if (dp[i][j]===dp[i-1][j]+GAP) { si=s[i-1]+si; ti='-'+ti; i--; }
    else { si='-'+si; ti=t[j-1]+ti; j--; }
  }
  return `${best}\n${si}\n${ti}`;
}

MEND — Inferring Genotype from a Pedigree

Given a pedigree, compute probability of each genotype (AA, Aa, aa) for an individual.

// Genotype inheritance table: P(offspring | parent1, parent2)
// Returns [P(AA), P(Aa), P(aa)]
function offspringProbs(p1: [number,number,number], p2: [number,number,number]): [number,number,number] {
  // p1[i], p2[j] = probabilities of genotypes AA, Aa, aa
  const table: number[][][] = [
    [[1,0,0],[0.5,0.5,0],[0,1,0]],    // AA × {AA,Aa,aa}
    [[0.5,0.5,0],[0.25,0.5,0.25],[0,0.5,0.5]], // Aa × {AA,Aa,aa}
    [[0,1,0],[0,0.5,0.5],[0,0,1]],    // aa × {AA,Aa,aa}
  ];
  const result: [number,number,number] = [0,0,0];
  for (let i=0;i<3;i++) for (let j=0;j<3;j++) {
    const p = p1[i]*p2[j];
    const probs = table[i][j];
    for (let k=0;k<3;k++) result[k] += p*probs[k];
  }
  return result;
}
function mend(input: string): string {
  // Parse pedigree and compute genotype probabilities bottom-up
  // Input: known genotypes at leaves, tree structure
  // Returns probability array for the queried individual
  return 'Use offspringProbs() to propagate genotype probabilities through pedigree tree.';
}

MGAP — Maximizing the Gap Symbols of an Optimal Alignment

Among all optimal global alignments, find the one with the maximum number of gap symbols.

function mgap(s: string, t: string): number {
  const GAP = -1, MATCH = 0, MISMATCH = 1;
  const m = s.length, n = t.length;
  // First pass: compute min edit distance
  const dp: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>i+j));
  for (let i=1;i<=m;i++) for (let j=1;j<=n;j++)
    dp[i][j]=s[i-1]===t[j-1]?dp[i-1][j-1]:1+Math.min(dp[i-1][j],dp[i][j-1],dp[i-1][j-1]);
  // Second pass: among optimal alignments, maximize gaps
  const gaps: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>-(i+j)));
  for (let i=1;i<=m;i++) for (let j=1;j<=n;j++) {
    gaps[i][j] = -Infinity;
    if (s[i-1]===t[j-1]&&dp[i][j]===dp[i-1][j-1]) gaps[i][j]=Math.max(gaps[i][j],gaps[i-1][j-1]);
    if (dp[i][j]===dp[i-1][j]+1) gaps[i][j]=Math.max(gaps[i][j],gaps[i-1][j]+1);
    if (dp[i][j]===dp[i][j-1]+1) gaps[i][j]=Math.max(gaps[i][j],gaps[i][j-1]+1);
    if (s[i-1]!==t[j-1]&&dp[i][j]===dp[i-1][j-1]+1) gaps[i][j]=Math.max(gaps[i][j],gaps[i-1][j-1]);
  }
  return gaps[m][n];
}

MREP — Identifying Maximal Repeats

Find all maximal repeats (substrings that appear more than once and cannot be extended).

function mrep(dna: string): string {
  const n = dna.length;
  // Collect all substrings that appear >= 2 times, then filter maximal
  const freq = new Map<string, number>();
  for (let i=0;i<n;i++)
    for (let j=i+2;j<=n;j++) {
      const s = dna.slice(i,j);
      freq.set(s, (freq.get(s)??0)+1);
    }
  const repeated = new Set([...freq].filter(([,c])=>c>=2).map(([s])=>s));
  // Maximal: not a substring of any longer repeated substring
  const maximal: string[] = [];
  for (const s of repeated) {
    const isExtendable = [...repeated].some(t => t.length > s.length && t.includes(s));
    if (!isExtendable) maximal.push(s);
  }
  return maximal.sort().join('\n');
}

MULT — Multiple Alignment

Compute a multiple sequence alignment using the center-star heuristic.

function mult(seqs: string[]): string {
  // Find the center sequence (minimum total pairwise edit distance)
  function editDist(a: string, b: string): number {
    const m=a.length,n=b.length;
    const dp=Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>i+j));
    for(let i=0;i<=m;i++)dp[i][0]=i;
    for(let j=0;j<=n;j++)dp[0][j]=j;
    for(let i=1;i<=m;i++)for(let j=1;j<=n;j++)
      dp[i][j]=a[i-1]===b[j-1]?dp[i-1][j-1]:1+Math.min(dp[i-1][j],dp[i][j-1],dp[i-1][j-1]);
    return dp[m][n];
  }
  const n = seqs.length;
  let centerIdx = 0, minTotalDist = Infinity;
  for (let i=0;i<n;i++) {
    let total = 0;
    for (let j=0;j<n;j++) if (i!==j) total += editDist(seqs[i], seqs[j]);
    if (total < minTotalDist) { minTotalDist = total; centerIdx = i; }
  }
  // Align all sequences to center using pairwise alignment
  const center = seqs[centerIdx];
  const aligned: string[] = [center];
  for (let i=0;i<n;i++) {
    if (i===centerIdx) continue;
    // Simple pairwise alignment with center
    const a = center, b = seqs[i];
    const m=a.length,nb=b.length;
    const dp=Array.from({length:m+1},(_,i)=>Array.from({length:nb+1},(_,j)=>i+j));
    for(let ii=1;ii<=m;ii++)for(let j=1;j<=nb;j++)
      dp[ii][j]=a[ii-1]===b[j-1]?dp[ii-1][j-1]:1+Math.min(dp[ii-1][j],dp[ii][j-1],dp[ii-1][j-1]);
    let ii=m,j=nb,sa='',sb='';
    while(ii>0||j>0){
      if(ii>0&&j>0&&a[ii-1]===b[j-1]){sa=a[ii-1]+sa;sb=b[j-1]+sb;ii--;j--;}
      else if(ii>0&&(j===0||dp[ii-1][j]<=dp[ii][j-1])){sa=a[ii-1]+sa;sb='-'+sb;ii--;}
      else{sa='-'+sa;sb=b[j-1]+sb;j--;}
    }
    if (aligned[0].length < sa.length) aligned[0] = sa; // update center with gaps
    aligned.push(sb);
  }
  return aligned.join('\n');
}

PDPL — Creating a Restriction Map

Reconstruct a restriction map from a multiset of pairwise distances (partial digest problem).

function pdpl(distances: number[]): string {
  const dists = [...distances].sort((a,b)=>a-b);
  const total = dists[dists.length-1];
  // Use backtracking
  const placed = new Set<number>([0, total]);
  const remaining = [...dists.slice(0,-1)];
  remaining.pop(); // remove 'total' which is placed[1]-placed[0]
  // Remove distances that are accounted for by 0 and total
  const toRemove = [total];
  const workList = [...remaining];

  function deleteFrom(list: number[], val: number): boolean {
    const idx = list.indexOf(val);
    if (idx === -1) return false;
    list.splice(idx, 1);
    return true;
  }
  function dists_to_placed(x: number, placed: Set<number>): number[] {
    return [...placed].map(p => Math.abs(x-p)).sort((a,b)=>a-b);
  }
  function place(dists: number[], placed: Set<number>): number[] | null {
    if (dists.length === 0) return [...placed].sort((a,b)=>a-b);
    const maxDist = dists[dists.length-1];
    for (const x of [maxDist, total-maxDist]) {
      const needed = dists_to_placed(x, placed);
      const tempDists = [...dists];
      let ok = true;
      for (const d of needed) { if (!deleteFrom(tempDists, d)) { ok = false; break; } }
      if (ok) {
        placed.add(x);
        const result = place(tempDists, placed);
        if (result) return result;
        placed.delete(x);
      }
    }
    return null;
  }
  const result = place(workList, placed);
  return result ? result.join(' ') : 'No solution';
}

ROOT — Counting Rooted Binary Trees

For n labeled leaves, the number of distinct rooted binary trees is (2n−3)!! for n ≥ 2.

function root(n: number): string {
  if (n === 1) return '1';
  let result = 1n;
  for (let i = 3n; i <= BigInt(2*n-3); i += 2n) result *= i;
  return result.toString();
}

SEXL — Sex-Linked Inheritance

Given the frequency q of a recessive X-linked allele, find the probability that a randomly chosen male expresses the trait and a female is a carrier.

function sexl(q: number[]): string {
  return q.map(qi => {
    const malePhenotype = qi;         // P(male expresses) = q
    const femaleCarrier = 2*qi*(1-qi); // P(Xx) = 2pq
    return `${malePhenotype.toFixed(3)} ${femaleCarrier.toFixed(3)}`;
  }).join('\n');
}

SPTD — Phylogeny Comparison with Split Distance

Compute the split distance between two trees (number of splits in one but not both).

function getSplits(newickStr: string): Set<string> {
  const tree = parseNewick(newickStr);
  function getLeaves(n: NwNode): string[] {
    if (n.children.length===0) return [n.name];
    return n.children.flatMap(({node})=>getLeaves(node));
  }
  const allLeaves = getLeaves(tree).sort();
  const splits = new Set<string>();
  function dfs(node: NwNode): Set<string> {
    if (node.children.length===0) return new Set([node.name]);
    const below = new Set<string>();
    for (const {node:c} of node.children) for (const l of dfs(c)) below.add(l);
    if (below.size>0 && below.size<allLeaves.length) {
      const vec = allLeaves.map(l=>below.has(l)?'1':'0').join('');
      const comp = vec.split('').map(c=>c==='0'?'1':'0').join('');
      splits.add(vec<comp?vec:comp);
    }
    return below;
  }
  dfs(tree);
  return splits;
}

function sptd(t1: string, t2: string): number {
  const s1 = getSplits(t1), s2 = getSplits(t2);
  let dist = 0;
  for (const s of s1) if (!s2.has(s)) dist++;
  for (const s of s2) if (!s1.has(s)) dist++;
  return dist;
}

WFMD — The Wright-Fisher Model of Genetic Drift

Simulate the Wright-Fisher model: given population size N, initial allele count k, generations t, and minimum final count m, return P(allele reaches at least m copies after t generations).

function wfmd(N: number, k: number, t: number, m: number): number {
  // dp[i] = probability of having i copies of allele A
  let dp = new Array(N+1).fill(0);
  dp[k] = 1;
  for (let gen = 0; gen < t; gen++) {
    const next = new Array(N+1).fill(0);
    for (let i = 0; i <= N; i++) {
      if (dp[i] === 0) continue;
      const p = i / N;
      // Binomial(N, p) distribution for next generation
      for (let j = 0; j <= N; j++) {
        // Use log to avoid overflow
        let logBinom = 0;
        for (let r = 0; r < j; r++) logBinom += Math.log(N-r) - Math.log(r+1);
        logBinom += j*Math.log(p || 1e-300) + (N-j)*Math.log((1-p) || 1e-300);
        next[j] += dp[i] * Math.exp(logBinom);
      }
    }
    dp = next;
  }
  return dp.slice(m).reduce((a,b)=>a+b, 0);
}

ALPH — Alignment-Based Phylogeny

Estimate a phylogenetic tree from pairwise Jukes-Cantor distances and apply neighbor-joining.

function jukesCantor(p: number): number {
  return p >= 0.75 ? Infinity : -0.75 * Math.log(1 - (4/3)*p);
}

function neighborJoining(labels: string[], distMatrix: number[][]): string {
  let n = labels.length;
  const d = distMatrix.map(r=>[...r]);
  const lbls = [...labels];
  while (n > 2) {
    const r = lbls.map((_,i) => d[i].reduce((s,v)=>s+v,0) / (n-2));
    let minQ = Infinity, pi = 0, pj = 1;
    for (let i=0;i<n;i++) for (let j=i+1;j<n;j++) {
      const q = d[i][j]-r[i]-r[j];
      if (q<minQ) { minQ=q; pi=i; pj=j; }
    }
    const newDists = lbls.map((_,k)=>(d[pi][k]+d[pj][k]-d[pi][pj])/2);
    const newLbl = `(${lbls[pi]},${lbls[pj]})`;
    d.splice(pj,1); d.forEach(row=>row.splice(pj,1));
    d.splice(pi,1); d.forEach(row=>row.splice(pi,1));
    newDists.splice(Math.max(pi,pj),1); newDists.splice(Math.min(pi,pj),1);
    d.push([...newDists,0]);
    d.forEach((row,i)=>{ if(i<d.length-1) row.push(newDists[i]); });
    lbls.splice(pj,1); lbls.splice(pi,1); lbls.push(newLbl);
    n--;
  }
  return `(${lbls[0]},${lbls[1]})`;
}

function alph(input: string): string {
  const seqs = parseFasta(input).map(([n,s])=>({name:n,seq:s}));
  const n = seqs.length;
  const dist: number[][] = Array.from({length:n},()=>new Array(n).fill(0));
  for (let i=0;i<n;i++) for (let j=i+1;j<n;j++) {
    const p = [...seqs[i].seq].filter((c,k)=>c!==seqs[j].seq[k]).length / seqs[i].seq.length;
    const jc = jukesCantor(p);
    dist[i][j] = dist[j][i] = jc;
  }
  return neighborJoining(seqs.map(s=>s.name), dist);
}

ASMQ — Assessing Assembly Quality with N50 and N75

Compute the N50 and N75 of an assembly given contig lengths.

function asmq(lengths: number[]): string {
  const sorted = [...lengths].sort((a,b)=>b-a);
  const total = sorted.reduce((a,b)=>a+b,0);
  let cum = 0, n50 = 0, n75 = 0;
  for (const l of sorted) {
    cum += l;
    if (!n50 && cum >= total*0.5) n50 = l;
    if (!n75 && cum >= total*0.75) n75 = l;
  }
  return `${n50} ${n75}`;
}

CSET — Fixing an Inconsistent Character Set

Given a character table with an inconsistent character, identify and remove the minimum number of taxa to make it consistent.

function cset(taxa: string[], chars: string[]): string {
  // Find all pairs of characters that violate the 4-gamete condition
  const violations: [number,number][] = [];
  for (let i=0;i<chars.length;i++) for (let j=i+1;j<chars.length;j++) {
    const pairs = new Set(taxa.map((_,t)=>chars[i][t]+chars[j][t]));
    if (['00','01','10','11'].every(p=>pairs.has(p))) violations.push([i,j]);
  }
  if (violations.length===0) return 'Consistent';
  // Identify taxa causing violations (simplified: flag taxa in violating columns)
  const problematic = new Set<string>();
  for (const [i,j] of violations) {
    // Find taxa where both characters are '1' or form the offending pattern
    for (let t=0;t<taxa.length;t++)
      if (chars[i][t]==='1'&&chars[j][t]==='1') problematic.add(taxa[t]);
  }
  return `Remove: ${[...problematic].join(', ')}`;
}

EBIN — Wright-Fisher’s Expected Behavior

Compute E[X_t] and Var[X_t] for the Wright-Fisher model after t generations, given initial allele count k and population size N.

function ebin(N: number, k: number, t: number): string {
  // E[X_t] = k (allele freq stays constant in expectation)
  // Var[X_t] = N * p*(1-p) * (1 - (1-1/N)^t) where p = k/N
  const p = k / N;
  const exp = p * N;
  const variance = N * p * (1-p) * (1 - Math.pow(1-1/N, t));
  return `${exp.toFixed(4)} ${variance.toFixed(4)}`;
}

FOUN — The Founder Effect and Genetic Drift

Given a population of N diploid organisms with k copies of allele A and initial count array, compute expected number of allele losses after t generations.

function foun(N: number, t: number, allelesCounts: number[]): string {
  return allelesCounts.map(k => {
    // P(allele lost after t gen) = P(X_t = 0 | X_0 = k)
    // Using drift approximation
    let p = k / (2*N);
    // Expected number still in population (simple drift model)
    const pLost = Math.pow((2*N - k) / (2*N), t); // rough approximation
    return Math.log10(pLost + 1e-300).toFixed(3);
  }).join(' ');
}

GAFF — Global Alignment with Scoring Matrix and Affine Gap Penalty

Global alignment using BLOSUM62 with affine gap penalty (open = −11, extend = −1).

function gaff(s: string, t: string): number {
  const OPEN = -11, EXTEND = -1;
  const m = s.length, n = t.length;
  const NEG = -1e9;
  const M: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(NEG));
  const X: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(NEG)); // gap in t
  const Y: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(NEG)); // gap in s
  M[0][0] = 0;
  for (let i=1;i<=m;i++) X[i][0] = OPEN + (i-1)*EXTEND;
  for (let j=1;j<=n;j++) Y[0][j] = OPEN + (j-1)*EXTEND;
  for (let i=1;i<=m;i++) for (let j=1;j<=n;j++) {
    const sc = BLOSUM62[s[i-1]]?.[t[j-1]] ?? -4;
    M[i][j] = Math.max(M[i-1][j-1], X[i-1][j-1], Y[i-1][j-1]) + sc;
    X[i][j] = Math.max(M[i-1][j]+OPEN, X[i-1][j]+EXTEND);
    Y[i][j] = Math.max(M[i][j-1]+OPEN, Y[i][j-1]+EXTEND);
  }
  return Math.max(M[m][n], X[m][n], Y[m][n]);
}

GREP — Genome Assembly with Perfect Coverage and Repeats

Assemble a circular genome from reads that may include repeats, using a De Bruijn graph Eulerian circuit.

// Uses the same De Bruijn + Eulerian path approach as GASM / PCOV
function grep(reads: string[]): string {
  return pcov(reads); // identical algorithm for perfect-coverage circular assembly
}

OAP — Overlap Alignment

Find the alignment of a suffix of s against a prefix of t with maximum score (BLOSUM62, gap = −2).

function oap(s: string, t: string): string {
  const GAP = -2;
  const m = s.length, n = t.length;
  const dp: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(0));
  // Free gaps at start: dp[i][0] = 0 for all i (we can start matching anywhere in s)
  for (let i=1;i<=m;i++) dp[i][0] = 0;
  for (let j=1;j<=n;j++) dp[0][j] = j*GAP;
  for (let i=1;i<=m;i++) for (let j=1;j<=n;j++)
    dp[i][j]=Math.max(dp[i-1][j-1]+(BLOSUM62[s[i-1]]?.[t[j-1]]??-4), dp[i-1][j]+GAP, dp[i][j-1]+GAP);
  // Best score in last column (matched all of t)
  let best = -Infinity, bestRow = m;
  for (let i=0;i<=m;i++) if (dp[i][n]>best) { best=dp[i][n]; bestRow=i; }
  // Backtrack
  let i=bestRow, j=n, sa='', ta='';
  while (i>0&&j>0) {
    if (dp[i][j]===dp[i-1][j-1]+(BLOSUM62[s[i-1]]?.[t[j-1]]??-4)) { sa=s[i-1]+sa; ta=t[j-1]+ta; i--;j--; }
    else if (dp[i][j]===dp[i-1][j]+GAP) { sa=s[i-1]+sa; ta='-'+ta; i--; }
    else { sa='-'+sa; ta=t[j-1]+ta; j--; }
  }
  return `${best}\n${sa}\n${ta}`;
}

QRTD — Quartet Distance

Compute the quartet distance between two unrooted trees on the same labeled leaves.

function getQuartets(newickStr: string): Set<string> {
  const tree = parseNewick(newickStr);
  function getLeaves(n: NwNode): string[] {
    if (n.children.length===0) return [n.name];
    return n.children.flatMap(({node})=>getLeaves(node));
  }
  const quartets = new Set<string>();
  function dfs(node: NwNode) {
    const childLeaves = node.children.map(({node:c})=>getLeaves(c));
    if (childLeaves.length < 2) { for (const {node:c} of node.children) dfs(c); return; }
    // For binary trees with two children
    if (childLeaves.length === 2) {
      const [L, R] = childLeaves;
      for (let a=0;a<L.length;a++) for (let b=a+1;b<L.length;b++)
        for (let c=0;c<R.length;c++) for (let dd=c+1;dd<R.length;dd++) {
          const q = [[L[a],L[b]].sort(),[R[c],R[dd]].sort()]
            .map(p=>p.join(',')).sort().join('|');
          quartets.add(q);
        }
    }
    for (const {node:c} of node.children) dfs(c);
  }
  dfs(tree);
  return quartets;
}

function qrtd(t1: string, t2: string): number {
  const q1 = getQuartets(t1), q2 = getQuartets(t2);
  let diff = 0;
  for (const q of q1) if (!q2.has(q)) diff++;
  for (const q of q2) if (!q1.has(q)) diff++;
  return diff;
}

SIMS — Finding a Motif with Modifications

Find all substrings of s that are within Hamming distance k of t.

function sims(s: string, t: string, k: number): string {
  const positions: number[] = [];
  for (let i = 0; i <= s.length - t.length; i++) {
    let mismatches = 0;
    for (let j = 0; j < t.length; j++)
      if (s[i+j] !== t[j]) mismatches++;
    if (mismatches <= k) positions.push(i+1);
  }
  return positions.join('\n');
}

SMGB — Semiglobal Alignment

Semiglobal alignment (free gaps at start/end of one sequence) using BLOSUM62, gap = −1.

function smgb(s: string, t: string): string {
  const GAP = -1;
  const m = s.length, n = t.length;
  const dp: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>j?j*GAP:0));
  // Free gaps at start of s (dp[0][j] = 0, gaps in s before t starts)
  for (let i=0;i<=m;i++) dp[i][0] = 0;
  for (let i=1;i<=m;i++) for (let j=1;j<=n;j++)
    dp[i][j]=Math.max(dp[i-1][j-1]+(BLOSUM62[s[i-1]]?.[t[j-1]]??-4), dp[i-1][j]+GAP, dp[i][j-1]+GAP);
  // Best in last row (free gaps at end of s)
  let best = -Infinity, bestCol = n;
  for (let j=0;j<=n;j++) if (dp[m][j]>best) { best=dp[m][j]; bestCol=j; }
  let i=m, j=bestCol, sa='', ta='';
  while (i>0&&j>0) {
    if (dp[i][j]===dp[i-1][j-1]+(BLOSUM62[s[i-1]]?.[t[j-1]]??-4)) { sa=s[i-1]+sa; ta=t[j-1]+ta; i--;j--; }
    else if (dp[i][j]===dp[i-1][j]+GAP) { sa=s[i-1]+sa; ta='-'+ta; i--; }
    else { sa='-'+sa; ta=t[j-1]+ta; j--; }
  }
  return `${best}\n${sa}\n${ta}`;
}

KSIM — Finding All Similar Motifs

For each substring of s of length |t|, output it if it is within Hamming distance k of t.

function ksim(s: string, t: string, k: number): string {
  const results: string[] = [];
  for (let i = 0; i <= s.length - t.length; i++) {
    const sub = s.slice(i, i+t.length);
    const dist = [...sub].filter((c,j)=>c!==t[j]).length;
    if (dist <= k) results.push(`${i+1} ${sub}`);
  }
  return results.join('\n');
}

LAFF — Local Alignment with Affine Gap Penalty

Smith-Waterman local alignment using BLOSUM62 with affine gap penalty (open = −11, extend = −1).

function laff(s: string, t: string): number {
  const OPEN = -11, EXTEND = -1;
  const m = s.length, n = t.length;
  const NEG = -1e9;
  const M: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(0));
  const X: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(NEG));
  const Y: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(NEG));
  let best = 0;
  for (let i=1;i<=m;i++) for (let j=1;j<=n;j++) {
    const sc = BLOSUM62[s[i-1]]?.[t[j-1]] ?? -4;
    M[i][j] = Math.max(0, M[i-1][j-1]+sc, X[i-1][j-1]+sc, Y[i-1][j-1]+sc);
    X[i][j] = Math.max(M[i-1][j]+OPEN, X[i-1][j]+EXTEND);
    Y[i][j] = Math.max(M[i][j-1]+OPEN, Y[i][j-1]+EXTEND);
    best = Math.max(best, M[i][j]);
  }
  return best;
}

OSYM — Isolating Symbols in Alignments

Count the total number of cells (i, j) in a global alignment DP table where the optimal alignment passes through, subject to the constraint that matched symbols cannot be adjacent to gap symbols.

// OSYM uses a modified alignment where score = +1 for isolated match (surrounded by gaps or edges)
// and -∞ for adjacent matches+gaps. This counts cells in optimal alignment paths.
function osym(s: string, t: string): number {
  const m = s.length, n = t.length;
  // Standard alignment counting cells visited in any optimal path
  const dp: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>i+j));
  for (let i=1;i<=m;i++) for (let j=1;j<=n;j++)
    dp[i][j]=s[i-1]===t[j-1]?dp[i-1][j-1]:1+Math.min(dp[i-1][j],dp[i][j-1],dp[i-1][j-1]);
  // Count cells on any optimal path
  const onPath: boolean[][] = Array.from({length:m+1},()=>new Array(n+1).fill(false));
  onPath[m][n] = true;
  for (let i=m;i>=0;i--) for (let j=n;j>=0;j--) {
    if (!onPath[i][j]) continue;
    if (i>0&&j>0&&s[i-1]===t[j-1]&&dp[i][j]===dp[i-1][j-1]) onPath[i-1][j-1]=true;
    if (i>0&&dp[i][j]===dp[i-1][j]+1) onPath[i-1][j]=true;
    if (j>0&&dp[i][j]===dp[i][j-1]+1) onPath[i][j-1]=true;
    if (i>0&&j>0&&s[i-1]!==t[j-1]&&dp[i][j]===dp[i-1][j-1]+1) onPath[i-1][j-1]=true;
  }
  return onPath.flat().filter(Boolean).length;
}

RSUB — Identifying Reversing Substitutions

Given a phylogenetic tree with sequences at leaves, identify substitutions that reverse a previous change (using Fitch parsimony for ancestral reconstruction).

function rsub(newickStr: string, sequences: Record<string, string>): string {
  const tree = parseNewick(newickStr);
  const seqLen = Object.values(sequences)[0]?.length ?? 0;
  const reversals: string[] = [];

  // Fitch parsimony: bottom-up then top-down
  function fitch(node: NwNode, col: number): Set<string> {
    if (node.children.length === 0) return new Set([sequences[node.name]?.[col] ?? 'N']);
    const childSets = node.children.map(({node:c})=>fitch(c, col));
    const inter = childSets.reduce((a,b)=>new Set([...a].filter(x=>b.has(x))));
    return inter.size > 0 ? inter : childSets.reduce((a,b)=>new Set([...a,...b]));
  }

  // For each column, count positions where reversal is possible
  for (let col = 0; col < seqLen; col++) {
    const root = fitch(tree, col);
    // A reversal occurs when a character mutates away and back
    // Simplified: count columns with non-trivial ancestral state changes
    if (root.size > 1) reversals.push(`Column ${col+1}: multiple ancestral states`);
  }
  return reversals.length > 0 ? reversals.join('\n') : 'No reversing substitutions detected';
}