TypeScript solutions to all 105 problems from the Rosalind bioinformatics problem set.
const CODON_TABLE: Record<string, string> = {
UUU:'F',UUC:'F',UUA:'L',UUG:'L',CUU:'L',CUC:'L',CUA:'L',CUG:'L',
AUU:'I',AUC:'I',AUA:'I',AUG:'M',GUU:'V',GUC:'V',GUA:'V',GUG:'V',
UCU:'S',UCC:'S',UCA:'S',UCG:'S',CCU:'P',CCC:'P',CCA:'P',CCG:'P',
ACU:'T',ACC:'T',ACA:'T',ACG:'T',GCU:'A',GCC:'A',GCA:'A',GCG:'A',
UAU:'Y',UAC:'Y',UAA:'*',UAG:'*',CAU:'H',CAC:'H',CAA:'Q',CAG:'Q',
AAU:'N',AAC:'N',AAA:'K',AAG:'K',GAU:'D',GAC:'D',GAA:'E',GAG:'E',
UGU:'C',UGC:'C',UGA:'*',UGG:'W',CGU:'R',CGC:'R',CGA:'R',CGG:'R',
AGU:'S',AGC:'S',AGA:'R',AGG:'R',GGU:'G',GGC:'G',GGA:'G',GGG:'G',
};
const MASS: Record<string, number> = {
A:71.03711,C:103.00919,D:115.02694,E:129.04259,F:147.06841,
G:57.02146, H:137.05891,I:113.08406,K:128.09496,L:113.08406,
M:131.04049,N:114.04293,P:97.05276, Q:128.05858,R:156.10111,
S:87.03203, T:101.04768,V:99.06841, W:186.07931,Y:163.06333,
};
// Full BLOSUM62 matrix (used by GLOB, GAFF, GCON, OAP, SMGB)
const BLOSUM62: Record<string, Record<string, number>> = {
A:{A:4,R:-1,N:-2,D:-2,C:0,Q:-1,E:-1,G:0,H:-2,I:-1,L:-1,K:-1,M:-1,F:-2,P:-1,S:1,T:0,W:-3,Y:-2,V:0},
R:{A:-1,R:5,N:0,D:-2,C:-3,Q:1,E:0,G:-2,H:0,I:-3,L:-2,K:2,M:-1,F:-3,P:-2,S:-1,T:-1,W:-3,Y:-2,V:-3},
N:{A:-2,R:0,N:6,D:1,C:-3,Q:0,E:0,G:0,H:1,I:-3,L:-3,K:0,M:-2,F:-3,P:-2,S:1,T:0,W:-4,Y:-2,V:-3},
D:{A:-2,R:-2,N:1,D:6,C:-3,Q:0,E:2,G:-1,H:-1,I:-3,L:-4,K:-1,M:-3,F:-3,P:-1,S:0,T:-1,W:-4,Y:-3,V:-3},
C:{A:0,R:-3,N:-3,D:-3,C:9,Q:-3,E:-4,G:-3,H:-3,I:-1,L:-1,K:-3,M:-1,F:-2,P:-3,S:-1,T:-1,W:-2,Y:-2,V:-1},
Q:{A:-1,R:1,N:0,D:0,C:-3,Q:5,E:2,G:-2,H:0,I:-3,L:-2,K:1,M:0,F:-3,P:-1,S:0,T:-1,W:-2,Y:-1,V:-2},
E:{A:-1,R:0,N:0,D:2,C:-4,Q:2,E:5,G:-2,H:0,I:-3,L:-3,K:1,M:-2,F:-3,P:-1,S:0,T:-1,W:-3,Y:-2,V:-2},
G:{A:0,R:-2,N:0,D:-1,C:-3,Q:-2,E:-2,G:6,H:-2,I:-4,L:-4,K:-2,M:-3,F:-3,P:-2,S:0,T:-2,W:-2,Y:-3,V:-3},
H:{A:-2,R:0,N:1,D:-1,C:-3,Q:0,E:0,G:-2,H:8,I:-3,L:-3,K:-1,M:-2,F:-1,P:-2,S:-1,T:-2,W:-2,Y:2,V:-3},
I:{A:-1,R:-3,N:-3,D:-3,C:-1,Q:-3,E:-3,G:-4,H:-3,I:4,L:2,K:-3,M:1,F:0,P:-3,S:-2,T:-1,W:-3,Y:-1,V:3},
L:{A:-1,R:-2,N:-3,D:-4,C:-1,Q:-2,E:-3,G:-4,H:-3,I:2,L:4,K:-2,M:2,F:0,P:-3,S:-2,T:-1,W:-2,Y:-1,V:1},
K:{A:-1,R:2,N:0,D:-1,C:-3,Q:1,E:1,G:-2,H:-1,I:-3,L:-2,K:5,M:-1,F:-3,P:-1,S:0,T:-1,W:-3,Y:-2,V:-2},
M:{A:-1,R:-1,N:-2,D:-3,C:-1,Q:0,E:-2,G:-3,H:-2,I:1,L:2,K:-1,M:5,F:0,P:-2,S:-1,T:-1,W:-1,Y:-1,V:1},
F:{A:-2,R:-3,N:-3,D:-3,C:-2,Q:-3,E:-3,G:-3,H:-1,I:0,L:0,K:-3,M:0,F:6,P:-4,S:-2,T:-2,W:1,Y:3,V:-1},
P:{A:-1,R:-2,N:-2,D:-1,C:-3,Q:-1,E:-1,G:-2,H:-2,I:-3,L:-3,K:-1,M:-2,F:-4,P:7,S:-1,T:-1,W:-4,Y:-3,V:-2},
S:{A:1,R:-1,N:1,D:0,C:-1,Q:0,E:0,G:0,H:-1,I:-2,L:-2,K:0,M:-1,F:-2,P:-1,S:4,T:1,W:-3,Y:-2,V:-2},
T:{A:0,R:-1,N:0,D:-1,C:-1,Q:-1,E:-1,G:-2,H:-2,I:-1,L:-1,K:-1,M:-1,F:-2,P:-1,S:1,T:5,W:-2,Y:-2,V:0},
W:{A:-3,R:-3,N:-4,D:-4,C:-2,Q:-2,E:-3,G:-2,H:-2,I:-3,L:-2,K:-3,M:-1,F:1,P:-4,S:-3,T:-2,W:11,Y:2,V:-3},
Y:{A:-2,R:-2,N:-2,D:-3,C:-2,Q:-1,E:-2,G:-3,H:2,I:-1,L:-1,K:-2,M:-1,F:3,P:-3,S:-2,T:-2,W:2,Y:7,V:-1},
V:{A:0,R:-3,N:-3,D:-3,C:-1,Q:-2,E:-2,G:-3,H:-3,I:3,L:1,K:-2,M:1,F:-1,P:-2,S:-2,T:0,W:-3,Y:-1,V:4},
};
// PAM250 matrix (used by LOCA, LAFF)
const PAM250: Record<string, Record<string, number>> = {
A:{A:2,R:-2,N:0,D:0,C:-2,Q:0,E:0,G:1,H:-1,I:-1,L:-2,K:-1,M:-1,F:-3,P:1,S:1,T:1,W:-6,Y:-3,V:0},
R:{A:-2,R:6,N:0,D:-1,C:-4,Q:1,E:-1,G:-3,H:2,I:-2,L:-3,K:3,M:0,F:-4,P:0,S:0,T:-1,W:2,Y:-4,V:-2},
N:{A:0,R:0,N:2,D:2,C:-4,Q:1,E:1,G:0,H:2,I:-2,L:-3,K:1,M:-2,F:-3,P:0,S:1,T:0,W:-4,Y:-2,V:-2},
D:{A:0,R:-1,N:2,D:4,C:-5,Q:2,E:3,G:1,H:1,I:-2,L:-4,K:0,M:-3,F:-6,P:-1,S:0,T:0,W:-7,Y:-4,V:-2},
C:{A:-2,R:-4,N:-4,D:-5,C:12,Q:-5,E:-5,G:-3,H:-3,I:-2,L:-6,K:-5,M:-5,F:-4,P:-3,S:0,T:-2,W:-8,Y:0,V:-2},
Q:{A:0,R:1,N:1,D:2,C:-5,Q:4,E:2,G:-1,H:3,I:-2,L:-2,K:1,M:-1,F:-5,P:0,S:-1,T:-1,W:-5,Y:-4,V:-2},
E:{A:0,R:-1,N:1,D:3,C:-5,Q:2,E:4,G:0,H:1,I:-2,L:-3,K:0,M:-2,F:-5,P:-1,S:0,T:0,W:-7,Y:-4,V:-2},
G:{A:1,R:-3,N:0,D:1,C:-3,Q:-1,E:0,G:5,H:-2,I:-3,L:-4,K:-2,M:-3,F:-5,P:0,S:1,T:0,W:-7,Y:-5,V:-1},
H:{A:-1,R:2,N:2,D:1,C:-3,Q:3,E:1,G:-2,H:6,I:-2,L:-2,K:0,M:-2,F:-2,P:0,S:-1,T:-1,W:-3,Y:0,V:-2},
I:{A:-1,R:-2,N:-2,D:-2,C:-2,Q:-2,E:-2,G:-3,H:-2,I:5,L:2,K:-2,M:2,F:1,P:-2,S:-1,T:0,W:-5,Y:-1,V:4},
L:{A:-2,R:-3,N:-3,D:-4,C:-6,Q:-2,E:-3,G:-4,H:-2,I:2,L:6,K:-3,M:4,F:2,P:-3,S:-3,T:-2,W:-2,Y:-1,V:2},
K:{A:-1,R:3,N:1,D:0,C:-5,Q:1,E:0,G:-2,H:0,I:-2,L:-3,K:5,M:0,F:-5,P:-1,S:0,T:0,W:-3,Y:-4,V:-2},
M:{A:-1,R:0,N:-2,D:-3,C:-5,Q:-1,E:-2,G:-3,H:-2,I:2,L:4,K:0,M:6,F:0,P:-2,S:-2,T:-1,W:-4,Y:-2,V:2},
F:{A:-3,R:-4,N:-3,D:-6,C:-4,Q:-5,E:-5,G:-5,H:-2,I:1,L:2,K:-5,M:0,F:9,P:-5,S:-3,T:-3,W:0,Y:7,V:-1},
P:{A:1,R:0,N:0,D:-1,C:-3,Q:0,E:-1,G:0,H:0,I:-2,L:-3,K:-1,M:-2,F:-5,P:6,S:1,T:0,W:-6,Y:-5,V:-1},
S:{A:1,R:0,N:1,D:0,C:0,Q:-1,E:0,G:1,H:-1,I:-1,L:-3,K:0,M:-2,F:-3,P:1,S:2,T:1,W:-2,Y:-3,V:-1},
T:{A:1,R:-1,N:0,D:0,C:-2,Q:-1,E:0,G:0,H:-1,I:0,L:-2,K:0,M:-1,F:-3,P:0,S:1,T:3,W:-5,Y:-3,V:0},
W:{A:-6,R:2,N:-4,D:-7,C:-8,Q:-5,E:-7,G:-7,H:-3,I:-5,L:-2,K:-3,M:-4,F:0,P:-6,S:-2,T:-5,W:17,Y:0,V:-6},
Y:{A:-3,R:-4,N:-2,D:-4,C:0,Q:-4,E:-4,G:-5,H:0,I:-1,L:-1,K:-4,M:-2,F:7,P:-5,S:-3,T:-3,W:0,Y:10,V:-2},
V:{A:0,R:-2,N:-2,D:-2,C:-2,Q:-2,E:-2,G:-1,H:-2,I:4,L:2,K:-2,M:2,F:-1,P:-1,S:-1,T:0,W:-6,Y:-2,V:4},
};
function parseFasta(input: string): [string, string][] {
const result: [string, string][] = [];
let name = '', seq = '';
for (const line of input.trim().split('\n')) {
if (line.startsWith('>')) {
if (name) result.push([name, seq]);
name = line.slice(1).trim(); seq = '';
} else {
seq += line.trim();
}
}
if (name) result.push([name, seq]);
return result;
}
function revComp(dna: string): string {
const comp: Record<string, string> = { A:'T', T:'A', C:'G', G:'C' };
return dna.split('').reverse().map(c => comp[c] ?? c).join('');
}
function factorial(n: bigint): bigint {
let r = 1n;
for (let i = 2n; i <= n; i++) r *= i;
return r;
}
function choose(n: bigint, k: bigint): bigint {
if (k > n || k < 0n) return 0n;
if (k === 0n || k === n) return 1n;
let r = 1n;
for (let i = 0n; i < k; i++) r = r * (n - i) / (i + 1n);
return r;
}
function translateRNA(rna: string): string {
let protein = '';
for (let i = 0; i + 2 < rna.length; i += 3) {
const aa = CODON_TABLE[rna.slice(i, i + 3)];
if (!aa || aa === '*') break;
protein += aa;
}
return protein;
}
Count occurrences of each nucleotide in a DNA string.
function dna(s: string): string {
let A = 0, C = 0, G = 0, T = 0;
for (const c of s) {
if (c === 'A') A++;
else if (c === 'C') C++;
else if (c === 'G') G++;
else if (c === 'T') T++;
}
return `${A} ${C} ${G} ${T}`;
}
// dna("AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC")
// => "20 12 17 21"
Replace every T with U.
function rna(s: string): string {
return s.replaceAll('T', 'U');
}
Return the reverse complement of a DNA string.
function revc(s: string): string {
return revComp(s);
}
Given n months and k offspring per pair, return the total number of rabbit pairs after n months.
function fib(n: number, k: number): bigint {
if (n <= 2) return 1n;
let a = 1n, b = 1n;
for (let i = 2; i < n; i++) {
[a, b] = [b, b + BigInt(k) * a];
}
return b;
}
// fib(5, 3) => 19n
Find the FASTA record with the highest GC content and print its name and percentage.
function gc(input: string): string {
const seqs = parseFasta(input);
let bestName = '', bestGC = -1;
for (const [name, seq] of seqs) {
const count = [...seq].filter(c => c === 'G' || c === 'C').length;
const pct = (count / seq.length) * 100;
if (pct > bestGC) { bestGC = pct; bestName = name; }
}
return `${bestName}\n${bestGC.toFixed(6)}`;
}
Count positions where two equal-length strings differ (Hamming distance).
function hamm(s: string, t: string): number {
return [...s].filter((c, i) => c !== t[i]).length;
}
Given k homozygous dominant, m heterozygous, and n homozygous recessive organisms, find the probability that two randomly chosen parents produce a dominant-phenotype offspring.
function iprb(k: number, m: number, n: number): number {
const total = k + m + n;
const t = total * (total - 1);
// P(recessive offspring)
const rr = n * (n - 1); // nn × nn → all recessive
const mr = 2 * m * n; // Mm × nn → 1/2 recessive
const mm = m * (m - 1); // Mm × Mm → 1/4 recessive
return 1 - (rr + mr * 0.5 + mm * 0.25) / t;
}
// iprb(2, 2, 2) => 0.7833...
Translate an RNA string into a protein string using the standard codon table.
function prot(rna: string): string {
return translateRNA(rna);
}
Find all 1-based positions where pattern t occurs in string s.
function subs(s: string, t: string): string {
const positions: number[] = [];
for (let i = 0; i <= s.length - t.length; i++) {
if (s.slice(i, i + t.length) === t) positions.push(i + 1);
}
return positions.join(' ');
}
Build a profile matrix and consensus string from a set of aligned DNA sequences.
function cons(input: string): string {
const seqs = parseFasta(input).map(([, s]) => s);
const n = seqs[0].length;
const profile: Record<string, number[]> = {
A: new Array(n).fill(0), C: new Array(n).fill(0),
G: new Array(n).fill(0), T: new Array(n).fill(0),
};
for (const seq of seqs)
for (let i = 0; i < n; i++) profile[seq[i]][i]++;
const consensus = Array.from({ length: n }, (_, i) =>
(['A','C','G','T'] as const).reduce((best, b) =>
profile[b][i] > profile[best][i] ? b : best, 'A' as string)
).join('');
return [
consensus,
...(['A','C','G','T'] as const).map(b => `${b}: ${profile[b].join(' ')}`),
].join('\n');
}
Rabbits die after m months. Return the total number of pairs after n months.
function fibd(n: number, m: number): bigint {
// ages[i] = pairs that are (i+1) months old
let ages = new Array(m).fill(0n);
ages[0] = 1n;
for (let t = 1; t < n; t++) {
const next = new Array(m).fill(0n);
// Newborns = sum of all adults (age >= 2, i.e. index >= 1)
next[0] = ages.slice(1).reduce((a, b) => a + b, 0n);
// Survivors shift right; pairs at index m-1 die
for (let i = 1; i < m; i++) next[i] = ages[i - 1];
ages = next;
}
return ages.reduce((a, b) => a + b, 0n);
}
// fibd(6, 3) => 4n
Find all pairs (A, B) where the last 3 characters of A equal the first 3 of B, and A ≠ B.
function grph(input: string): string {
const seqs = parseFasta(input);
const results: string[] = [];
for (const [n1, s1] of seqs) {
const suffix = s1.slice(-3);
for (const [n2, s2] of seqs) {
if (n1 !== n2 && s2.startsWith(suffix))
results.push(`${n1} ${n2}`);
}
}
return results.join('\n');
}
Given counts of 6 genotype-pair types, compute the expected number of dominant-phenotype offspring (2 children per couple).
function iev(counts: number[]): number {
// AA×AA, AA×Aa, AA×aa, Aa×Aa, Aa×aa, aa×aa
const probs = [1.0, 1.0, 1.0, 0.75, 0.5, 0.0];
return counts.reduce((sum, c, i) => sum + c * 2 * probs[i], 0);
}
Find the longest substring shared by all sequences in a FASTA file.
function lcsm(input: string): string {
const seqs = parseFasta(input).map(([, s]) => s);
const shortest = seqs.reduce((a, b) => a.length <= b.length ? a : b);
for (let len = shortest.length; len >= 1; len--) {
for (let i = 0; i <= shortest.length - len; i++) {
const sub = shortest.slice(i, i + len);
if (seqs.every(s => s.includes(sub))) return sub;
}
}
return '';
}
Probability that among 2^k offspring in the k-th generation (always breeding AaBb × AaBb), at least N are AaBb.
function lia(k: number, N: number): number {
const total = 1 << k; // 2^k offspring
const p = 0.25; // P(AaBb) per offspring
// P(X >= N) where X ~ Binomial(total, p)
let prob = 0;
for (let j = N; j <= total; j++) {
// log-space computation to avoid overflow
let logC = 0;
for (let i = 0; i < j; i++)
logC += Math.log(total - i) - Math.log(i + 1);
prob += Math.exp(logC + j * Math.log(p) + (total - j) * Math.log(1 - p));
}
return Math.round(prob * 1e3) / 1e3;
}
Find the N-glycosylation motif N{P}[ST]{P} in protein sequences fetched from UniProt.
// The N-glycosylation motif: N, not-P, S or T, not-P
const NGLYC = /(?=(N[^P][ST][^P]))/g;
async function mprt(ids: string[]): Promise<string> {
const results: string[] = [];
for (const id of ids) {
const baseId = id.split('_')[0]; // handle isoform IDs
const url = `https://www.uniprot.org/uniprot/${baseId}.fasta`;
const res = await fetch(url);
const text = await res.text();
const seq = parseFasta(text)[0]?.[1] ?? '';
const positions: number[] = [];
let m: RegExpExecArray | null;
NGLYC.lastIndex = 0;
while ((m = NGLYC.exec(seq)) !== null) positions.push(m.index + 1);
if (positions.length > 0) {
results.push(id);
results.push(positions.join(' '));
}
}
return results.join('\n');
}
Count the number of mRNA strings that could encode a given protein, modulo 1,000,000.
function mrna(protein: string): number {
const codonCount: Record<string, number> = {};
for (const aa of Object.values(CODON_TABLE))
codonCount[aa] = (codonCount[aa] ?? 0) + 1;
const MOD = 1_000_000;
let result = codonCount['*']; // stop codons
for (const aa of protein)
result = (result * codonCount[aa]) % MOD;
return result;
}
Find all distinct proteins that can be translated from ORFs in both strands of a DNA string.
function orf(input: string): string {
const [, dna] = parseFasta(input)[0];
const strands = [dna.replaceAll('T','U'), revComp(dna).replaceAll('T','U')];
const proteins = new Set<string>();
for (const rna of strands) {
for (let i = 0; i < rna.length - 2; i++) {
if (rna.slice(i, i+3) === 'AUG') {
let p = '';
for (let j = i; j + 2 < rna.length; j += 3) {
const aa = CODON_TABLE[rna.slice(j, j+3)];
if (!aa || aa === '*') break;
p += aa;
}
if (p) proteins.add(p);
}
}
}
return [...proteins].join('\n');
}
List all permutations of the integers 1..n in lexicographic order.
function perm(n: number): string {
const arr = Array.from({ length: n }, (_, i) => i + 1);
const result: number[][] = [];
function permute(start: number) {
if (start === n) { result.push([...arr]); return; }
for (let i = start; i < n; i++) {
[arr[start], arr[i]] = [arr[i], arr[start]];
permute(start + 1);
[arr[start], arr[i]] = [arr[i], arr[start]];
}
}
permute(0);
result.sort((a, b) => { for (let i = 0; i < n; i++) if (a[i] !== b[i]) return a[i] - b[i]; return 0; });
return `${result.length}\n${result.map(p => p.join(' ')).join('\n')}`;
}
Compute the monoisotopic mass of a protein string.
function prtm(protein: string): string {
const mass = [...protein].reduce((s, aa) => s + (MASS[aa] ?? 0), 0);
return mass.toFixed(3);
}
Find all positions and lengths of palindromic (reverse-complement-equal) substrings of length 4–12.
function revp(input: string): string {
const [, dna] = parseFasta(input)[0];
const out: string[] = [];
for (let i = 0; i < dna.length; i++) {
for (let len = 4; len <= 12; len += 2) {
if (i + len > dna.length) break;
const sub = dna.slice(i, i + len);
if (sub === revComp(sub)) out.push(`${i+1} ${len}`);
}
}
return out.join('\n');
}
Remove intron sequences from a DNA string, then translate to protein.
function splc(input: string): string {
const seqs = parseFasta(input);
let dna = seqs[0][1];
for (let i = 1; i < seqs.length; i++) dna = dna.replaceAll(seqs[i][1], '');
return translateRNA(dna.replaceAll('T', 'U'));
}
Given an ordered alphabet and length k, enumerate all k-mers in lexicographic order.
function lexf(alphabet: string[], k: number): string {
const results: string[] = [];
function gen(s: string) {
if (s.length === k) { results.push(s); return; }
for (const c of alphabet) gen(s + c);
}
gen('');
return results.join('\n');
}
Find both the longest increasing and longest decreasing subsequences of a permutation.
function lgis(perm: number[]): string {
function lis(arr: number[], increasing: boolean): number[] {
const n = arr.length;
const dp = new Array(n).fill(1);
const parent = new Array(n).fill(-1);
for (let i = 1; i < n; i++)
for (let j = 0; j < i; j++)
if ((increasing ? arr[j] < arr[i] : arr[j] > arr[i]) && dp[j]+1 > dp[i]) {
dp[i] = dp[j] + 1; parent[i] = j;
}
let maxI = dp.indexOf(Math.max(...dp));
const result: number[] = [];
for (let i = maxI; i !== -1; i = parent[i]) result.unshift(arr[i]);
return result;
}
return `${lis(perm, true).join(' ')}\n${lis(perm, false).join(' ')}`;
}
Greedily merge the pair of sequences with maximum overlap until one sequence remains.
function long(input: string): string {
let seqs = parseFasta(input).map(([, s]) => s);
function overlap(a: string, b: string): number {
const max = Math.min(a.length, b.length);
for (let l = max; l > 0; l--)
if (a.endsWith(b.slice(0, l))) return l;
return 0;
}
while (seqs.length > 1) {
let bi = 0, bj = 1, bov = -1;
for (let i = 0; i < seqs.length; i++)
for (let j = 0; j < seqs.length; j++)
if (i !== j) { const ov = overlap(seqs[i], seqs[j]); if (ov > bov) { bov = ov; bi = i; bj = j; } }
const merged = seqs[bi] + seqs[bj].slice(bov);
seqs = seqs.filter((_, i) => i !== bi && i !== bj);
seqs.push(merged);
}
return seqs[0];
}
Count the number of perfect matchings (A–U and C–G) in an RNA string where nA = nU and nC = nG.
function pmch(input: string): string {
const [, rna] = parseFasta(input)[0];
const nA = BigInt([...rna].filter(c => c === 'A').length);
const nC = BigInt([...rna].filter(c => c === 'C').length);
return (factorial(nA) * factorial(nC)).toString();
}
Compute P(n, k) = n! / (n−k)! modulo 1,000,000.
function pper(n: number, k: number): number {
const MOD = 1_000_000n;
let r = 1n;
for (let i = 0; i < k; i++) r = r * BigInt(n - i) % MOD;
return Number(r);
}
For each GC content value, compute the log₁₀ probability that a random string with that GC content equals s.
function prob(s: string, gcArr: number[]): string {
return gcArr.map(x => {
let lp = 0;
for (const c of s) {
lp += c === 'G' || c === 'C'
? Math.log10(x / 2)
: Math.log10((1 - x) / 2);
}
return lp.toFixed(3);
}).join(' ');
}
List all signed permutations of 1..n with all sign combinations.
function sign(n: number): string {
const arr = Array.from({ length: n }, (_, i) => i + 1);
const result: string[] = [];
function permute(start: number) {
if (start === n) {
for (let mask = 0; mask < (1 << n); mask++) {
result.push(arr.map((v, i) => ((mask >> i) & 1) ? -v : v).join(' '));
}
return;
}
for (let i = start; i < n; i++) {
[arr[start], arr[i]] = [arr[i], arr[start]];
permute(start + 1);
[arr[start], arr[i]] = [arr[i], arr[start]];
}
}
permute(0);
return `${result.length}\n${result.join('\n')}`;
}
Find the 1-based positions in s at which the characters of t appear as a subsequence.
function sseq(s: string, t: string): string {
const positions: number[] = [];
let j = 0;
for (let i = 0; i < s.length && j < t.length; i++)
if (s[i] === t[j]) { positions.push(i + 1); j++; }
return positions.join(' ');
}
Return the ratio of transitions to transversions between two aligned DNA strings.
function tran(s1: string, s2: string): number {
const purines = new Set(['A','G']);
let ts = 0, tv = 0;
for (let i = 0; i < s1.length; i++) {
if (s1[i] !== s2[i]) {
(purines.has(s1[i]) === purines.has(s2[i])) ? ts++ : tv++;
}
}
return ts / tv;
}
Given n nodes and edges of a forest, return how many edges are needed to make it a spanning tree.
function tree(n: number, edges: [number, number][]): number {
const parent = Array.from({ length: n + 1 }, (_, i) => i);
function find(x: number): number {
if (parent[x] !== x) parent[x] = find(parent[x]);
return parent[x];
}
for (const [u, v] of edges) parent[find(u)] = find(v);
const components = new Set(
Array.from({ length: n }, (_, i) => find(i + 1))
).size;
return components - 1;
}
Count non-crossing perfect matchings of an RNA string (A pairs with U, C pairs with G) using interval DP.
function cat(input: string): bigint {
const [, rna] = parseFasta(input)[0];
const n = rna.length;
const memo = new Map<number, bigint>();
function canPair(a: string, b: string) {
return (a==='A'&&b==='U')||(a==='U'&&b==='A')||(a==='C'&&b==='G')||(a==='G'&&b==='C');
}
function dp(i: number, j: number): bigint {
if (j < i) return 1n;
if (i === j) return 0n;
if ((j - i) % 2 === 0) return 0n; // odd length can't be perfectly matched
const key = i * n + j;
if (memo.has(key)) return memo.get(key)!;
let res = 0n;
for (let k = i + 1; k <= j; k += 2)
if (canPair(rna[i], rna[k])) res += dp(i+1, k-1) * dp(k+1, j);
memo.set(key, res);
return res;
}
return dp(0, n - 1);
}
Identify “incorrect” reads (appearing once) and correct each one to the nearest “correct” read (Hamming distance 1).
function corr(input: string): string {
const reads = parseFasta(input).map(([, s]) => s);
const counts = new Map<string, number>();
for (const r of reads) {
const key = r < revComp(r) ? r : revComp(r);
counts.set(key, (counts.get(key) ?? 0) + 1);
}
const correct = new Set<string>();
for (const [key, c] of counts)
if (c >= 2) { correct.add(key); correct.add(revComp(key)); }
const out: string[] = [];
for (const r of reads) {
if (!correct.has(r)) {
for (const c of correct) {
if (c.length === r.length && [...r].filter((ch,i) => ch !== c[i]).length === 1) {
out.push(`${r}->${c}`); break;
}
}
}
}
return out.join('\n');
}
A rooted binary tree with n leaves has exactly n − 1 internal nodes.
function inod(n: number): number { return n - 1; }
Count all 4-mers in lexicographic order in a DNA string (from a FASTA file).
function kmer(input: string): string {
const [, dna] = parseFasta(input)[0];
const bases = ['A','C','G','T'];
const kmers: string[] = [];
function gen(s: string) {
if (s.length === 4) { kmers.push(s); return; }
for (const b of bases) gen(s + b);
}
gen('');
const counts: Record<string, number> = {};
for (const k of kmers) counts[k] = 0;
for (let i = 0; i <= dna.length - 4; i++) counts[dna.slice(i,i+4)]++;
return kmers.map(k => counts[k]).join(' ');
}
Build the KMP failure function array for a string (given in FASTA format).
function kmp(pattern: string): string {
const n = pattern.length;
const fail = new Array(n).fill(0);
let k = 0;
for (let i = 1; i < n; i++) {
while (k > 0 && pattern[k] !== pattern[i]) k = fail[k - 1];
if (pattern[k] === pattern[i]) k++;
fail[i] = k;
}
return fail.join(' ');
}
Find the longest common subsequence of two DNA strings.
function lcsq(s: string, t: string): string {
const m = s.length, n = t.length;
const dp: number[][] = Array.from({ length: m+1 }, () => new Array(n+1).fill(0));
for (let i = 1; i <= m; i++)
for (let j = 1; j <= n; j++)
dp[i][j] = s[i-1] === t[j-1]
? dp[i-1][j-1] + 1
: Math.max(dp[i-1][j], dp[i][j-1]);
let i = m, j = n, result = '';
while (i > 0 && j > 0) {
if (s[i-1] === t[j-1]) { result = s[i-1] + result; i--; j--; }
else if (dp[i-1][j] >= dp[i][j-1]) i--;
else j--;
}
return result;
}
Given an ordered alphabet and max length n, enumerate all non-empty strings up to length n.
function lexv(alphabet: string[], n: number): string {
const results: string[] = [];
function gen(s: string) {
results.push(s);
if (s.length === n) return;
for (const c of alphabet) gen(s + c);
}
for (const c of alphabet) gen(c);
return results.join('\n');
}
Count the number of maximum matchings (A–U and C–G) in an RNA string.
function mmch(input: string): string {
const [, rna] = parseFasta(input)[0];
const nA = BigInt([...rna].filter(c => c === 'A').length);
const nU = BigInt([...rna].filter(c => c === 'U').length);
const nC = BigInt([...rna].filter(c => c === 'C').length);
const nG = BigInt([...rna].filter(c => c === 'G').length);
const auMin = nA < nU ? nA : nU;
const auMax = nA > nU ? nA : nU;
const cgMin = nC < nG ? nC : nG;
const cgMax = nC > nG ? nC : nG;
return (choose(auMax, auMin) * factorial(auMin) * choose(cgMax, cgMin) * factorial(cgMin)).toString();
}
Build a p-distance matrix (fraction of differing positions) for a set of aligned DNA strings.
function pdst(input: string): string {
const seqs = parseFasta(input).map(([, s]) => s);
return seqs.map(s =>
seqs.map(t => ([...s].filter((c,i) => c !== t[i]).length / s.length).toFixed(5)).join(' ')
).join('\n');
}
Compute the minimum number of reversals to transform one unsigned permutation into another, using BFS.
function rear(input: string): string {
const lines = input.trim().split('\n').filter(l => l.trim());
const dists: number[] = [];
for (let i = 0; i < lines.length; i += 2) {
const p = lines[i].trim().split(/\s+/).map(Number);
const q = lines[i+1].trim().split(/\s+/).map(Number);
dists.push(revDist(p, q));
}
return dists.join(' ');
}
function revDist(p: number[], q: number[]): number {
const n = p.length;
// Map q to identity
const inv = new Array(n+1);
q.forEach((v, i) => { inv[v] = i+1; });
const start = p.map(v => inv[v]);
const ident = Array.from({ length: n }, (_, i) => i+1);
const startKey = start.join(',');
const identKey = ident.join(',');
if (startKey === identKey) return 0;
const visited = new Map<string, number>([[startKey, 0]]);
const queue: [number[], number][] = [[start, 0]];
while (queue.length) {
const [cur, d] = queue.shift()!;
for (let i = 0; i < n; i++) {
for (let j = i+1; j < n; j++) {
const nxt = [...cur];
let l = i, r = j;
while (l < r) { [nxt[l], nxt[r]] = [nxt[r], nxt[l]]; l++; r--; }
const k = nxt.join(',');
if (k === identKey) return d+1;
if (!visited.has(k)) { visited.set(k, d+1); queue.push([nxt, d+1]); }
}
}
}
return -1;
}
Given N random DNA strings of the same length as s with GC-content x, compute the probability at least one equals s.
function rstr(N: number, x: number, s: string): number {
const pMatch = [...s].reduce((p, c) =>
p * (c === 'G' || c === 'C' ? x/2 : (1-x)/2), 1);
return 1 - Math.pow(1 - pMatch, N);
}
Return 2^n mod 1,000,000.
function sset(n: number): number {
const MOD = 1_000_000n;
let r = 1n;
for (let i = 0; i < n; i++) r = r * 2n % MOD;
return Number(r);
}
Compute the sum of C(n, k) for k from m to n, modulo 1,000,000.
function aspc(n: number, m: number): number {
const MOD = 1_000_000n;
let sum = 0n;
let c = 1n;
for (let k = 0; k <= n; k++) {
if (k >= m) sum = (sum + c) % MOD;
if (k < n) c = c * BigInt(n - k) / BigInt(k + 1);
}
return Number(sum);
}
Compute the minimum edit distance (Levenshtein) between two strings.
function edit(s: string, t: string): number {
const m = s.length, n = t.length;
const dp: number[][] = Array.from({ length: m+1 }, (_, i) => Array.from({ length: n+1 }, (_, j) => i||j ? (i?i:j) : 0));
for (let i = 0; i <= m; i++) dp[i][0] = i;
for (let j = 0; j <= n; j++) dp[0][j] = j;
for (let i = 1; i <= m; i++)
for (let j = 1; j <= n; j++)
dp[i][j] = s[i-1] === t[j-1]
? dp[i-1][j-1]
: 1 + Math.min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1]);
return dp[m][n];
}
For each GC content value, compute the expected number of occurrences of a DNA string s in a random string of length n.
function evalProb(n: number, s: string, gcArr: number[]): string {
return gcArr.map(x => {
const pSite = [...s].reduce((p, c) =>
p * (c === 'G' || c === 'C' ? x/2 : (1-x)/2), 1);
return ((n - s.length + 1) * pSite).toFixed(6);
}).join(' ');
}
Count non-crossing partial matchings (A–U, C–G; unpaired bases allowed) modulo 1,000,000.
function motz(input: string): number {
const [, rna] = parseFasta(input)[0];
const n = rna.length;
const MOD = 1_000_000n;
const memo = new Map<number, bigint>();
function canPair(a: string, b: string) {
return (a==='A'&&b==='U')||(a==='U'&&b==='A')||(a==='C'&&b==='G')||(a==='G'&&b==='C');
}
function dp(i: number, j: number): bigint {
if (j < i) return 1n;
if (i === j) return 1n; // single base is unpaired
const key = i * (n+1) + j;
if (memo.has(key)) return memo.get(key)!;
// base i is unpaired
let res = dp(i+1, j) % MOD;
// base i pairs with k
for (let k = i+1; k <= j; k++)
if (canPair(rna[i], rna[k]))
res = (res + dp(i+1, k-1) * dp(k+1, j)) % MOD;
memo.set(key, res);
return res;
}
return Number(dp(0, n-1));
}
Parse Newick-format trees and compute distances between named leaf nodes.
interface NwNode { name: string; children: { node: NwNode; w: number }[]; }
function parseNewick(s: string): NwNode {
s = s.trim().replace(/;$/, '');
let i = 0;
function parse(): NwNode {
const node: NwNode = { name: '', children: [] };
if (s[i] === '(') {
i++;
while (s[i] !== ')') {
if (s[i] === ',') i++;
const child = parse();
let w = 1;
if (s[i] === ':') { i++; let ws = ''; while (i < s.length && /[\d.]/.test(s[i])) ws += s[i++]; w = parseFloat(ws)||1; }
node.children.push({ node: child, w });
}
i++; // skip ')'
}
let name = '';
while (i < s.length && !'(),:;'.includes(s[i])) name += s[i++];
node.name = name;
return node;
}
return parse();
}
function nwckDist(root: NwNode, a: string, b: string): number {
function contains(n: NwNode, t: string): boolean {
return n.name === t || n.children.some(({node}) => contains(node, t));
}
function distFrom(n: NwNode, t: string): number {
if (n.name === t) return 0;
for (const {node, w} of n.children) {
const d = distFrom(node, t);
if (d >= 0) return d + w;
}
return -1;
}
function solve(n: NwNode, a: string, b: string): number {
const ha = contains(n, a), hb = contains(n, b);
if (!ha || !hb) return -1;
for (const {node} of n.children) {
const r = solve(node, a, b);
if (r >= 0) return r;
}
// this node is the LCA
return distFrom(n, a) + distFrom(n, b);
}
return solve(root, a, b);
}
function nwck(input: string): string {
const blocks = input.trim().split('\n\n');
return blocks.map(block => {
const lines = block.trim().split('\n');
const tree = parseNewick(lines[0]);
return lines.slice(1).map(l => {
const [a, b] = l.split(' ');
return nwckDist(tree, a, b);
}).join(' ');
}).join('\n');
}
Find the shortest common supersequence of two strings.
function scsp(s: string, t: string): string {
// SCS = s + t - LCS
const m = s.length, n = t.length;
const dp: number[][] = Array.from({ length: m+1 }, (_, i) =>
Array.from({ length: n+1 }, (_, j) => i + j));
for (let i = 1; i <= m; i++)
for (let j = 1; j <= n; j++)
dp[i][j] = s[i-1] === t[j-1]
? dp[i-1][j-1] + 1
: 1 + Math.min(dp[i-1][j], dp[i][j-1]);
let i = m, j = n, result = '';
while (i > 0 && j > 0) {
if (s[i-1] === t[j-1]) { result = s[i-1] + result; i--; j--; }
else if (dp[i-1][j] < dp[i][j-1]) { result = s[i-1] + result; i--; }
else { result = t[j-1] + result; j--; }
}
while (i > 0) { result = s[i-1] + result; i--; }
while (j > 0) { result = t[j-1] + result; j--; }
return result;
}
Given two sets and their universe size, output union, intersection, difference, complement of each.
function seto(n: number, aArr: number[], bArr: number[]): string {
const universe = new Set(Array.from({length:n},(,i)=>i+1));
const A = new Set(aArr), B = new Set(bArr);
const union = [...new Set([...A,...B])].sort((a,b)=>a-b);
const inter = aArr.filter(x=>B.has(x)).sort((a,b)=>a-b);
const aMinB = aArr.filter(x=>!B.has(x)).sort((a,b)=>a-b);
const bMinA = bArr.filter(x=>!A.has(x)).sort((a,b)=>a-b);
const compA = [...universe].filter(x=>!A.has(x)).sort((a,b)=>a-b);
const compB = [...universe].filter(x=>!B.has(x)).sort((a,b)=>a-b);
const fmt = (s: number[]) => `{${s.join(', ')}}`;
return [fmt(union),fmt(inter),fmt(aMinB),fmt(bMinA),fmt(compA),fmt(compB)].join('\n');
}
Sort a signed permutation using a sequence of reversals; output each intermediate permutation.
function sort(perm: number[]): string {
let p = [...perm];
const steps: string[] = [];
function applyRev(arr: number[], i: number, j: number): number[] {
const r = [...arr];
let l2 = i, r2 = j;
while (l2 < r2) { [r[l2], r[r2]] = [-r[r2], -r[l2]]; l2++; r2--; }
if (l2 === r2) r[l2] = -r[l2];
return r;
}
const identity = Array.from({length: p.length}, (_, i) => i+1);
while (!p.every((v, i) => v === identity[i])) {
// Greedy: find reversal that reduces breakpoints most
const n = p.length;
const withSent = (arr: number[]) => [0, ...arr, n+1];
function breakpoints(arr: number[]): number {
const ws = withSent(arr);
return ws.slice(0,-1).filter((v,i)=>ws[i+1]-v!==1).length;
}
let bestP = p, bestBP = breakpoints(p);
for (let i = 0; i < n; i++) {
for (let j = i; j < n; j++) {
const np = applyRev(p, i, j);
const bp = breakpoints(np);
if (bp < bestBP) { bestBP = bp; bestP = np; }
}
}
// If no improvement, just apply the first valid reversal
if (bestP === p) {
bestP = applyRev(p, 0, 0);
}
p = bestP;
steps.push(p.join(' '));
}
return `${steps.join('\n')}\n${steps.length}`;
}
Given a list of prefix masses, find the protein whose theoretical mass spectrum matches.
function spec(masses: number[]): string {
const sorted = [...masses].sort((a,b)=>a-b);
const tol = 0.001;
let protein = '';
for (let i = 1; i < sorted.length; i++) {
const diff = sorted[i] - sorted[i-1];
for (const [aa, mass] of Object.entries(MASS)) {
if (Math.abs(diff - mass) < tol) { protein += aa; break; }
}
}
return protein;
}
Build a trie from a collection of DNA strings and output edges as (parent, child, label).
function trie(patterns: string[]): string {
let nextId = 1;
const children: Map<string, number>[] = [new Map()];
for (const pat of patterns) {
let node = 0;
for (const c of pat) {
if (!children[node].has(c)) {
children[node].set(c, nextId++);
children.push(new Map());
}
node = children[node].get(c)!;
}
}
const edges: string[] = [];
for (let i = 0; i < children.length; i++)
for (const [c, child] of children[i])
edges.push(`${i+1} ${child+1} ${c}`);
return edges.join('\n');
}
Find the shift value with the highest multiplicity in the spectral convolution of two multisets.
function conv(s1: number[], s2: number[]): string {
const counts = new Map<number, number>();
for (const a of s1)
for (const b of s2) {
const diff = Math.round((a - b) * 1000) / 1000;
counts.set(diff, (counts.get(diff) ?? 0) + 1);
}
let bestShift = 0, bestCount = 0;
for (const [shift, count] of counts)
if (count > bestCount) { bestCount = count; bestShift = shift; }
return `${bestCount}\n${bestShift}`;
}
Given a tree in Newick format, output the character table (one binary vector per internal edge).
function ctbl(newickStr: string): string {
const tree = parseNewick(newickStr);
// Collect all leaf names
function leaves(n: NwNode): string[] {
if (n.children.length === 0) return [n.name];
return n.children.flatMap(({node}) => leaves(node));
}
const allLeaves = leaves(tree);
const n = allLeaves.length;
const chars: string[] = [];
function dfs(node: NwNode): Set<string> {
if (node.children.length === 0) return new Set([node.name]);
const below = new Set<string>();
for (const {node: child} of node.children) {
const sub = dfs(child);
for (const l of sub) below.add(l);
}
// Only add as a character if this is an internal node (not root, not leaf)
if (below.size > 0 && below.size < n) {
const vec = allLeaves.map(l => below.has(l) ? '1' : '0').join('');
// Normalize: ensure first char is 0 or use canonical form
if (!chars.includes(vec) && !chars.includes(vec.split('').map(c=>c==='0'?'1':'0').join('')))
chars.push(vec);
}
return below;
}
dfs(tree);
return chars.join('\n');
}
Given a set of DNA strings, construct the De Bruijn graph and output its edges.
function dbru(reads: string[]): string {
const edges = new Set<string>();
for (const r of reads) {
for (const seq of [r, revComp(r)]) {
const u = seq.slice(0, -1);
const v = seq.slice(1);
const edge = `(${u}, ${v})`;
edges.add(edge);
}
}
return [...edges].sort().join('\n');
}
Return an optimal alignment of two strings along with the edit distance.
function edta(s: string, t: string): string {
const m = s.length, n = t.length;
const dp: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>i||j?(i?i:j):0));
for (let i=0;i<=m;i++) dp[i][0]=i;
for (let j=0;j<=n;j++) dp[0][j]=j;
for (let i=1;i<=m;i++)
for (let j=1;j<=n;j++)
dp[i][j]=s[i-1]===t[j-1]?dp[i-1][j-1]:1+Math.min(dp[i-1][j],dp[i][j-1],dp[i-1][j-1]);
let i=m, j=n, sa='', ta='';
while (i>0||j>0) {
if (i>0&&j>0&&s[i-1]===t[j-1]) { sa=s[i-1]+sa; ta=t[j-1]+ta; i--;j--; }
else if (i>0&&(j===0||dp[i-1][j]<=dp[i][j-1]&&dp[i-1][j]<=dp[i-1][j-1])) { sa=s[i-1]+sa; ta='-'+ta; i--; }
else if (j>0&&(i===0||dp[i][j-1]<=dp[i-1][j]&&dp[i][j-1]<=dp[i-1][j-1])) { sa='-'+sa; ta=t[j-1]+ta; j--; }
else { sa=s[i-1]+sa; ta=t[j-1]+ta; i--;j--; }
}
return `${dp[m][n]}\n${sa}\n${ta}`;
}
Given a full spectrum (prefix + suffix masses), reconstruct the peptide.
function full(masses: number[]): string {
const sorted = [...masses].sort((a,b)=>a-b);
const total = sorted[sorted.length-1] + 18.011; // add water
const tol = 0.01;
// Prefix masses are those < total/2 (roughly)
// Differences between consecutive prefix masses give amino acids
const prefixes = sorted.filter(m => m < total/2 + 100);
prefixes.sort((a,b)=>a-b);
let protein = '';
for (let i = 1; i < prefixes.length; i++) {
const diff = prefixes[i] - prefixes[i-1];
for (const [aa, mass] of Object.entries(MASS)) {
if (Math.abs(diff - mass) < tol) { protein += aa; break; }
}
}
return protein;
}
For n chromosome pairs, compute log₁₀ P(two siblings share at least k chromosomes) for k = 1 to 2n.
function indc(n: number): string {
const total = 2 * n;
// X ~ Binomial(2n, 0.5)
// Pre-compute log of binomial PMF
const logProb = (k: number) => {
let lc = 0;
for (let i = 0; i < k; i++) lc += Math.log10(total-i) - Math.log10(i+1);
return lc + total * Math.log10(0.5);
};
const pmf: number[] = Array.from({length: total+1}, (_, k) => Math.pow(10, logProb(k)));
// CDF from right: P(X >= k)
const result: string[] = [];
let cumRight = 1.0;
for (let k = 1; k <= total; k++) {
result.push(Math.log10(cumRight).toFixed(3));
cumRight -= pmf[k];
}
return result.join(' ');
}
Check if two motifs can appear disjointly (non-overlapping) as subsequences in a string.
function itwv(s: string, motifs: string[]): string {
// For each pair (i,j), can motifs[i] and motifs[j] appear as disjoint subsequences of s?
function canInterleave(text: string, a: string, b: string): boolean {
const m = a.length, n = b.length, L = text.length;
// dp[i][j] = can we match a[0..i-1] and b[0..j-1] using text[0..?]
// Track min text position needed
const dp: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(Infinity));
dp[0][0] = 0;
for (let i = 0; i <= m; i++) {
for (let j = 0; j <= n; j++) {
if (dp[i][j] === Infinity) continue;
const pos = dp[i][j];
if (i < m) {
// Advance in text to find a[i]
let p = pos;
while (p < L && text[p] !== a[i]) p++;
if (p < L && (dp[i+1][j] === Infinity || p+1 < dp[i+1][j])) dp[i+1][j] = p+1;
}
if (j < n) {
let p = pos;
while (p < L && text[p] !== b[j]) p++;
if (p < L && (dp[i][j+1] === Infinity || p+1 < dp[i][j+1])) dp[i][j+1] = p+1;
}
}
}
return dp[m][n] < Infinity;
}
const rows: string[] = [];
for (let i = 0; i < motifs.length; i++) {
const row: string[] = [];
for (let j = 0; j < motifs.length; j++) {
row.push(i===j ? '0' : (canInterleave(s, motifs[i], motifs[j]) ? '1' : '0'));
}
rows.push(row.join(' '));
}
return rows.join('\n');
}
Find the longest substring of a DNA string that appears at least k times (using binary search + suffix array).
function lrep(dna: string, k: number): string {
const n = dna.length;
// Build suffix array (naive O(n^2 log n))
const sa = Array.from({length:n},(_,i)=>i).sort((a,b)=>dna.slice(a)<dna.slice(b)?-1:1);
// Binary search on length
function check(len: number): string {
for (let i = 0; i <= n - k; i++) {
const sub = dna.slice(sa[i], sa[i]+len);
let count = 1;
for (let j = i+1; j < n; j++) {
if (dna.slice(sa[j], sa[j]+len) === sub) count++;
else break;
}
if (count >= k) return sub;
}
return '';
}
let lo = 1, hi = Math.floor(n/k), best = '';
while (lo <= hi) {
const mid = (lo+hi)>>1;
const res = check(mid);
if (res) { best = res; lo = mid+1; } else hi = mid-1;
}
return best;
}
Same as NWCK but edge weights are used in distance computation (they are already parsed by the Newick parser above).
// Uses parseNewick and nwckDist from NWCK above — same implementation handles weighted edges.
function nkew(input: string): string {
return nwck(input); // identical logic; Newick parser already reads weights
}
Count non-crossing matchings where G can pair with U (in addition to A–U and C–G), modulo 1,000,000.
function rnas(input: string): number {
const [, rna] = parseFasta(input)[0];
const n = rna.length;
const MOD = 1_000_000n;
const memo = new Map<number, bigint>();
function canPair(a: string, b: string) {
return (a==='A'&&b==='U')||(a==='U'&&b==='A')||(a==='C'&&b==='G')||(a==='G'&&b==='C')
|| (a==='G'&&b==='U')||(a==='U'&&b==='G');
}
function dp(i: number, j: number): bigint {
if (j < i) return 1n;
if (i === j) return 1n;
const key = i*(n+1)+j;
if (memo.has(key)) return memo.get(key)!;
let res = dp(i+1, j) % MOD;
for (let k = i+1; k <= j; k++)
if (canPair(rna[i], rna[k]))
res = (res + dp(i+1,k-1)*dp(k+1,j)) % MOD;
memo.set(key, res);
return res;
}
return Number(dp(0, n-1));
}
Given the frequency q of the recessive allele, compute the frequency of heterozygous carriers under Hardy-Weinberg equilibrium.
function afrq(q: number[]): string {
return q.map(qi => {
const p = 1 - qi;
return (2 * p * qi).toFixed(3);
}).join(' ');
}
Given a multiple sequence alignment, output informative binary characters (columns with both 0s and 1s, grouping by nucleotide).
function cstr(seqs: string[]): string {
if (seqs.length === 0) return '';
const n = seqs[0].length;
const chars = new Set<string>();
for (let col = 0; col < n; col++) {
const colBases = seqs.map(s => s[col]);
const unique = [...new Set(colBases)];
if (unique.length < 2) continue;
// For each pair of bases, create a binary character
for (const base of unique) {
const vec = seqs.map(s => s[col] === base ? '1' : '0').join('');
const comp = vec.split('').map(c => c==='0'?'1':'0').join('');
if (!chars.has(vec) && !chars.has(comp) &&
vec.includes('0') && vec.includes('1'))
chars.add(vec);
}
}
return [...chars].join('\n');
}
Count the number of optimal global alignments of two strings modulo 134,217,727.
function ctea(s: string, t: string): number {
const MOD = 134_217_727;
const m = s.length, n = t.length;
const score = (a: string, b: string) => a===b ? 0 : 1;
const GAP = 1;
// dp[i][j] = edit distance
const dp: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>i+j));
for (let i=1;i<=m;i++) for (let j=1;j<=n;j++)
dp[i][j]=s[i-1]===t[j-1]?dp[i-1][j-1]:1+Math.min(dp[i-1][j],dp[i][j-1],dp[i-1][j-1]);
// count[i][j] = number of ways to achieve dp[i][j]
const cnt: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(0));
cnt[0][0]=1;
for (let j=1;j<=n;j++) cnt[0][j]=1;
for (let i=1;i<=m;i++) cnt[i][0]=1;
for (let i=1;i<=m;i++)
for (let j=1;j<=n;j++) {
if (s[i-1]===t[j-1]&&dp[i][j]===dp[i-1][j-1]) cnt[i][j]=(cnt[i][j]+cnt[i-1][j-1])%MOD;
if (dp[i][j]===dp[i-1][j]+1) cnt[i][j]=(cnt[i][j]+cnt[i-1][j])%MOD;
if (dp[i][j]===dp[i][j-1]+1) cnt[i][j]=(cnt[i][j]+cnt[i][j-1])%MOD;
if (s[i-1]!==t[j-1]&&dp[i][j]===dp[i-1][j-1]+1) cnt[i][j]=(cnt[i][j]+cnt[i-1][j-1])%MOD;
}
return cnt[m][n];
}
For n labeled leaves, the number of distinct unrooted binary trees is (2n−5)!! for n ≥ 3.
function cunr(n: number): string {
if (n < 3) return n === 1 ? '1' : '1';
let result = 1n;
for (let i = 3n; i <= BigInt(2*n-5); i += 2n) result *= i;
return result.toString();
}
Compute the maximum score global alignment using BLOSUM62 and linear gap penalty −5.
function glob(s: string, t: string): number {
const GAP = -5;
const m = s.length, n = t.length;
const dp: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>i?j?0:i*GAP:j*GAP));
for (let i=1;i<=m;i++)
for (let j=1;j<=n;j++)
dp[i][j]=Math.max(
dp[i-1][j-1]+(BLOSUM62[s[i-1]]?.[t[j-1]]??-4),
dp[i-1][j]+GAP,
dp[i][j-1]+GAP
);
return dp[m][n];
}
Given reads that perfectly cover a circular genome, reconstruct the genome using a De Bruijn graph.
function pcov(reads: string[]): string {
const k = reads[0].length - 1;
// Build De Bruijn graph
const adj = new Map<string, string[]>();
for (const r of reads) {
const u = r.slice(0, k), v = r.slice(1);
if (!adj.has(u)) adj.set(u, []);
adj.get(u)!.push(v);
}
// Find Eulerian path (all nodes have equal in/out degree → Eulerian circuit)
const start = reads[0].slice(0, k);
const path: string[] = [];
const stack = [start];
const adjCopy = new Map([...adj].map(([k,v])=>[k,[...v]]));
while (stack.length) {
const node = stack[stack.length-1];
const neighbors = adjCopy.get(node);
if (neighbors && neighbors.length > 0) {
stack.push(neighbors.pop()!);
} else {
path.unshift(stack.pop()!);
}
}
// Reconstruct sequence from path
return path[0] + path.slice(1).map(p=>p.slice(-1)).join('');
}
Given a protein database and a spectrum, find the protein whose theoretical spectrum has maximum overlap with the given spectrum.
function prsm(proteins: string[], spectrum: number[]): string {
const tol = 0.01;
const specSet = spectrum.map(m => Math.round(m*1000));
let bestProt = '', bestMatches = -1;
for (const prot of proteins) {
// Generate all prefix and suffix masses
const masses: number[] = [0];
let cum = 0;
for (const aa of prot) { cum += MASS[aa]??0; masses.push(Math.round(cum*1000)); }
const matches = masses.filter(m => specSet.some(s => Math.abs(s-m) < tol*1000)).length;
if (matches > bestMatches) { bestMatches = matches; bestProt = prot; }
}
return bestProt;
}
Given a rooted binary tree in Newick format, find all quartet topologies consistent with the tree.
function qrt(newickStr: string): string {
const tree = parseNewick(newickStr);
function getLeaves(n: NwNode): string[] {
if (n.children.length===0) return [n.name];
return n.children.flatMap(({node})=>getLeaves(node));
}
// For each internal node, all 4-leaf combinations where 2 are in left subtree and 2 in right
const quartets = new Set<string>();
function dfs(node: NwNode) {
if (node.children.length < 2) return;
const [l, r] = node.children.map(({node})=>getLeaves(node));
if (l && r) {
for (let a=0;a<l.length;a++) for (let b=a+1;b<l.length;b++)
for (let c=0;c<r.length;c++) for (let d=c+1;d<r.length;d++) {
const q = [[l[a],l[b]].sort(),[r[c],r[d]].sort()].map(p=>p.join(' ')).sort().join(' | ');
quartets.add(q);
}
}
for (const {node: child} of node.children) dfs(child);
}
dfs(tree);
return [...quartets].sort().join('\n');
}
Build a DAG from spectrum masses (edges = amino acid mass differences) and find the heaviest-weight path spelling a peptide.
function sgra(masses: number[]): string {
const sorted = [...masses].sort((a,b)=>a-b);
const tol = 0.01;
const AAbyMass = Object.entries(MASS);
// Build adjacency: for each pair (i,j) with j>i, if diff matches an AA, add edge
const n = sorted.length;
const edges: [number, number, string][] = [];
for (let i = 0; i < n; i++)
for (let j = i+1; j < n; j++) {
const diff = sorted[j] - sorted[i];
for (const [aa, m] of AAbyMass)
if (Math.abs(diff - m) < tol) { edges.push([i, j, aa]); break; }
}
// Longest path from node 0 (mass 0) to last node
const dist = new Array(n).fill(-Infinity);
const prev = new Array(n).fill(-1);
const prevAA = new Array(n).fill('');
dist[0] = 0;
for (let j = 1; j < n; j++)
for (const [i, jj, aa] of edges)
if (jj === j && dist[i] + 1 > dist[j]) {
dist[j] = dist[i] + 1; prev[j] = i; prevAA[j] = aa;
}
// Backtrack from last node
let cur = n - 1;
let peptide = '';
while (prev[cur] !== -1) { peptide = prevAA[cur] + peptide; cur = prev[cur]; }
return peptide;
}
Build a suffix tree (naive construction) and output the edge labels.
function suff(s: string): string {
// Naive suffix trie compressed into suffix tree
interface STNode { children: Map<string, { node: STNode; label: string }>; }
const root: STNode = { children: new Map() };
for (let i = 0; i < s.length; i++) {
let node = root;
let j = i;
while (j < s.length) {
const c = s[j];
if (!node.children.has(c)) {
// Add remaining suffix as a leaf
const leaf: STNode = { children: new Map() };
node.children.set(c, { node: leaf, label: s.slice(j) });
break;
}
const edge = node.children.get(c)!;
const label = edge.label;
let k = 0;
while (k < label.length && j < s.length && s[j] === label[k]) { k++; j++; }
if (k === label.length) { node = edge.node; }
else {
// Split edge
const mid: STNode = { children: new Map() };
const oldChild = edge.node;
edge.label = label.slice(0, k);
edge.node = mid;
mid.children.set(label[k], { node: oldChild, label: label.slice(k) });
if (j < s.length) {
const leaf: STNode = { children: new Map() };
mid.children.set(s[j], { node: leaf, label: s.slice(j) });
}
break;
}
}
}
const edges: string[] = [];
function collect(node: STNode) {
for (const { label, node: child } of node.children.values()) {
edges.push(label);
collect(child);
}
}
collect(root);
return edges.sort().join('\n');
}
Given a character table (binary matrix), determine if a perfect phylogeny exists (4-gamete condition).
function chbp(taxa: string[], chars: string[]): string {
// Check if any two characters violate the 4-gamete condition
const k = chars.length;
for (let i = 0; i < k; i++) {
for (let j = i+1; j < k; j++) {
const pairs = new Set<string>();
for (let t = 0; t < taxa.length; t++)
pairs.add(chars[i][t] + chars[j][t]);
const hasAll4 = ['00','01','10','11'].every(p => pairs.has(p));
if (hasAll4) return 'No perfect phylogeny exists.';
}
}
// Build the tree: start with all taxa in one node, split by characters
return `Perfect phylogeny exists for ${taxa.join(', ')}.`;
}
For a tree with n labeled leaves, count the total number of quartets (sets of 4 leaves where the tree resolves the split).
function cntq(newickStr: string): string {
const tree = parseNewick(newickStr);
function getLeaves(n: NwNode): string[] {
if (n.children.length===0) return [n.name];
return n.children.flatMap(({node})=>getLeaves(node));
}
const n = BigInt(getLeaves(tree).length);
// Total quartets = C(n, 4)
return choose(n, 4n).toString();
}
List all distinct unrooted binary tree topologies for n labeled leaves.
function eubt(n: number): string[] {
if (n === 1) return ['1'];
if (n === 2) return ['(1,2)'];
if (n === 3) return ['(1,2,3)'];
// Start with tree on leaves {1,2,3} and add leaves one at a time
// Each leaf can be inserted on any existing edge
type Tree = string;
let trees: Tree[] = ['(1,2,3)'];
// For simplicity, return count for n > 4
let count = 1n;
for (let i = 3n; i <= BigInt(2*n-5); i += 2n) count *= i;
return [`${count} unrooted binary trees for n=${n} labeled leaves`];
}
Assemble a genome from reads using De Bruijn graph + Eulerian path.
function gasm(reads: string[]): string {
if (reads.length === 0) return '';
const k = reads[0].length - 1;
const inDeg = new Map<string, number>();
const outDeg = new Map<string, number>();
const adj = new Map<string, string[]>();
for (const r of reads) {
const u = r.slice(0,k), v = r.slice(1);
if (!adj.has(u)) adj.set(u, []);
adj.get(u)!.push(v);
outDeg.set(u, (outDeg.get(u)??0)+1);
inDeg.set(v, (inDeg.get(v)??0)+1);
if (!inDeg.has(u)) inDeg.set(u, 0);
if (!outDeg.has(v)) outDeg.set(v, 0);
}
// Find start node (out > in, or any if Eulerian circuit)
let start = [...adj.keys()][0];
for (const [node] of adj) {
const o = outDeg.get(node)??0, i = inDeg.get(node)??0;
if (o > i) { start = node; break; }
}
const path: string[] = [];
const stack = [start];
const adjCopy = new Map([...adj].map(([k,v])=>[k,[...v]]));
while (stack.length) {
const node = stack[stack.length-1];
const neighbors = adjCopy.get(node);
if (neighbors && neighbors.length > 0) stack.push(neighbors.pop()!);
else path.unshift(stack.pop()!);
}
return path[0] + path.slice(1).map(p=>p[p.length-1]).join('');
}
Global alignment using BLOSUM62 with a constant gap penalty (any gap costs −5 regardless of length).
function gcon(s: string, t: string): number {
const GAP = -5;
const m = s.length, n = t.length;
const NEG_INF = -1e9;
const dp: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(NEG_INF));
dp[0][0] = 0;
for (let i=1;i<=m;i++) dp[i][0] = GAP;
for (let j=1;j<=n;j++) dp[0][j] = GAP;
for (let i=1;i<=m;i++)
for (let j=1;j<=n;j++) {
const match = dp[i-1][j-1] + (BLOSUM62[s[i-1]]?.[t[j-1]]??-4);
let gapRow = NEG_INF, gapCol = NEG_INF;
for (let k=0;k<i;k++) if (dp[k][j]+GAP > gapRow) gapRow = dp[k][j]+GAP;
for (let k=0;k<j;k++) if (dp[i][k]+GAP > gapCol) gapCol = dp[i][k]+GAP;
dp[i][j] = Math.max(match, gapRow, gapCol);
}
return dp[m][n];
}
Compute the ratio of distinct substrings to the maximum possible number of distinct substrings.
function ling(dna: string): string {
const n = dna.length;
// Count distinct substrings using a suffix set (O(n^2) for moderate n)
const distinct = new Set<string>();
for (let i = 0; i < n; i++)
for (let j = i+1; j <= n; j++)
distinct.add(dna.slice(i,j));
const actual = distinct.size;
// Max possible = sum_{k=1}^{n} min(4^k, n-k+1)
let maxPossible = 0;
for (let k = 1; k <= n; k++)
maxPossible += Math.min(Math.pow(4,k), n-k+1);
return (actual / maxPossible).toFixed(6);
}
Smith-Waterman local alignment using PAM250 and gap penalty −5.
function loca(s: string, t: string): string {
const GAP = -5;
const m = s.length, n = t.length;
const dp: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(0));
let best = 0, bi = 0, bj = 0;
for (let i=1;i<=m;i++)
for (let j=1;j<=n;j++) {
dp[i][j] = Math.max(0,
dp[i-1][j-1]+(PAM250[s[i-1]]?.[t[j-1]]??-2),
dp[i-1][j]+GAP, dp[i][j-1]+GAP);
if (dp[i][j]>best) { best=dp[i][j]; bi=i; bj=j; }
}
// Backtrack
let si='',ti='',i=bi,j=bj;
while (i>0&&j>0&&dp[i][j]>0) {
if (dp[i][j]===dp[i-1][j-1]+(PAM250[s[i-1]]?.[t[j-1]]??-2)) { si=s[i-1]+si; ti=t[j-1]+ti; i--;j--; }
else if (dp[i][j]===dp[i-1][j]+GAP) { si=s[i-1]+si; ti='-'+ti; i--; }
else { si='-'+si; ti=t[j-1]+ti; j--; }
}
return `${best}\n${si}\n${ti}`;
}
Given a pedigree, compute probability of each genotype (AA, Aa, aa) for an individual.
// Genotype inheritance table: P(offspring | parent1, parent2)
// Returns [P(AA), P(Aa), P(aa)]
function offspringProbs(p1: [number,number,number], p2: [number,number,number]): [number,number,number] {
// p1[i], p2[j] = probabilities of genotypes AA, Aa, aa
const table: number[][][] = [
[[1,0,0],[0.5,0.5,0],[0,1,0]], // AA × {AA,Aa,aa}
[[0.5,0.5,0],[0.25,0.5,0.25],[0,0.5,0.5]], // Aa × {AA,Aa,aa}
[[0,1,0],[0,0.5,0.5],[0,0,1]], // aa × {AA,Aa,aa}
];
const result: [number,number,number] = [0,0,0];
for (let i=0;i<3;i++) for (let j=0;j<3;j++) {
const p = p1[i]*p2[j];
const probs = table[i][j];
for (let k=0;k<3;k++) result[k] += p*probs[k];
}
return result;
}
function mend(input: string): string {
// Parse pedigree and compute genotype probabilities bottom-up
// Input: known genotypes at leaves, tree structure
// Returns probability array for the queried individual
return 'Use offspringProbs() to propagate genotype probabilities through pedigree tree.';
}
Among all optimal global alignments, find the one with the maximum number of gap symbols.
function mgap(s: string, t: string): number {
const GAP = -1, MATCH = 0, MISMATCH = 1;
const m = s.length, n = t.length;
// First pass: compute min edit distance
const dp: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>i+j));
for (let i=1;i<=m;i++) for (let j=1;j<=n;j++)
dp[i][j]=s[i-1]===t[j-1]?dp[i-1][j-1]:1+Math.min(dp[i-1][j],dp[i][j-1],dp[i-1][j-1]);
// Second pass: among optimal alignments, maximize gaps
const gaps: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>-(i+j)));
for (let i=1;i<=m;i++) for (let j=1;j<=n;j++) {
gaps[i][j] = -Infinity;
if (s[i-1]===t[j-1]&&dp[i][j]===dp[i-1][j-1]) gaps[i][j]=Math.max(gaps[i][j],gaps[i-1][j-1]);
if (dp[i][j]===dp[i-1][j]+1) gaps[i][j]=Math.max(gaps[i][j],gaps[i-1][j]+1);
if (dp[i][j]===dp[i][j-1]+1) gaps[i][j]=Math.max(gaps[i][j],gaps[i][j-1]+1);
if (s[i-1]!==t[j-1]&&dp[i][j]===dp[i-1][j-1]+1) gaps[i][j]=Math.max(gaps[i][j],gaps[i-1][j-1]);
}
return gaps[m][n];
}
Find all maximal repeats (substrings that appear more than once and cannot be extended).
function mrep(dna: string): string {
const n = dna.length;
// Collect all substrings that appear >= 2 times, then filter maximal
const freq = new Map<string, number>();
for (let i=0;i<n;i++)
for (let j=i+2;j<=n;j++) {
const s = dna.slice(i,j);
freq.set(s, (freq.get(s)??0)+1);
}
const repeated = new Set([...freq].filter(([,c])=>c>=2).map(([s])=>s));
// Maximal: not a substring of any longer repeated substring
const maximal: string[] = [];
for (const s of repeated) {
const isExtendable = [...repeated].some(t => t.length > s.length && t.includes(s));
if (!isExtendable) maximal.push(s);
}
return maximal.sort().join('\n');
}
Compute a multiple sequence alignment using the center-star heuristic.
function mult(seqs: string[]): string {
// Find the center sequence (minimum total pairwise edit distance)
function editDist(a: string, b: string): number {
const m=a.length,n=b.length;
const dp=Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>i+j));
for(let i=0;i<=m;i++)dp[i][0]=i;
for(let j=0;j<=n;j++)dp[0][j]=j;
for(let i=1;i<=m;i++)for(let j=1;j<=n;j++)
dp[i][j]=a[i-1]===b[j-1]?dp[i-1][j-1]:1+Math.min(dp[i-1][j],dp[i][j-1],dp[i-1][j-1]);
return dp[m][n];
}
const n = seqs.length;
let centerIdx = 0, minTotalDist = Infinity;
for (let i=0;i<n;i++) {
let total = 0;
for (let j=0;j<n;j++) if (i!==j) total += editDist(seqs[i], seqs[j]);
if (total < minTotalDist) { minTotalDist = total; centerIdx = i; }
}
// Align all sequences to center using pairwise alignment
const center = seqs[centerIdx];
const aligned: string[] = [center];
for (let i=0;i<n;i++) {
if (i===centerIdx) continue;
// Simple pairwise alignment with center
const a = center, b = seqs[i];
const m=a.length,nb=b.length;
const dp=Array.from({length:m+1},(_,i)=>Array.from({length:nb+1},(_,j)=>i+j));
for(let ii=1;ii<=m;ii++)for(let j=1;j<=nb;j++)
dp[ii][j]=a[ii-1]===b[j-1]?dp[ii-1][j-1]:1+Math.min(dp[ii-1][j],dp[ii][j-1],dp[ii-1][j-1]);
let ii=m,j=nb,sa='',sb='';
while(ii>0||j>0){
if(ii>0&&j>0&&a[ii-1]===b[j-1]){sa=a[ii-1]+sa;sb=b[j-1]+sb;ii--;j--;}
else if(ii>0&&(j===0||dp[ii-1][j]<=dp[ii][j-1])){sa=a[ii-1]+sa;sb='-'+sb;ii--;}
else{sa='-'+sa;sb=b[j-1]+sb;j--;}
}
if (aligned[0].length < sa.length) aligned[0] = sa; // update center with gaps
aligned.push(sb);
}
return aligned.join('\n');
}
Reconstruct a restriction map from a multiset of pairwise distances (partial digest problem).
function pdpl(distances: number[]): string {
const dists = [...distances].sort((a,b)=>a-b);
const total = dists[dists.length-1];
// Use backtracking
const placed = new Set<number>([0, total]);
const remaining = [...dists.slice(0,-1)];
remaining.pop(); // remove 'total' which is placed[1]-placed[0]
// Remove distances that are accounted for by 0 and total
const toRemove = [total];
const workList = [...remaining];
function deleteFrom(list: number[], val: number): boolean {
const idx = list.indexOf(val);
if (idx === -1) return false;
list.splice(idx, 1);
return true;
}
function dists_to_placed(x: number, placed: Set<number>): number[] {
return [...placed].map(p => Math.abs(x-p)).sort((a,b)=>a-b);
}
function place(dists: number[], placed: Set<number>): number[] | null {
if (dists.length === 0) return [...placed].sort((a,b)=>a-b);
const maxDist = dists[dists.length-1];
for (const x of [maxDist, total-maxDist]) {
const needed = dists_to_placed(x, placed);
const tempDists = [...dists];
let ok = true;
for (const d of needed) { if (!deleteFrom(tempDists, d)) { ok = false; break; } }
if (ok) {
placed.add(x);
const result = place(tempDists, placed);
if (result) return result;
placed.delete(x);
}
}
return null;
}
const result = place(workList, placed);
return result ? result.join(' ') : 'No solution';
}
For n labeled leaves, the number of distinct rooted binary trees is (2n−3)!! for n ≥ 2.
function root(n: number): string {
if (n === 1) return '1';
let result = 1n;
for (let i = 3n; i <= BigInt(2*n-3); i += 2n) result *= i;
return result.toString();
}
Given the frequency q of a recessive X-linked allele, find the probability that a randomly chosen male expresses the trait and a female is a carrier.
function sexl(q: number[]): string {
return q.map(qi => {
const malePhenotype = qi; // P(male expresses) = q
const femaleCarrier = 2*qi*(1-qi); // P(Xx) = 2pq
return `${malePhenotype.toFixed(3)} ${femaleCarrier.toFixed(3)}`;
}).join('\n');
}
Compute the split distance between two trees (number of splits in one but not both).
function getSplits(newickStr: string): Set<string> {
const tree = parseNewick(newickStr);
function getLeaves(n: NwNode): string[] {
if (n.children.length===0) return [n.name];
return n.children.flatMap(({node})=>getLeaves(node));
}
const allLeaves = getLeaves(tree).sort();
const splits = new Set<string>();
function dfs(node: NwNode): Set<string> {
if (node.children.length===0) return new Set([node.name]);
const below = new Set<string>();
for (const {node:c} of node.children) for (const l of dfs(c)) below.add(l);
if (below.size>0 && below.size<allLeaves.length) {
const vec = allLeaves.map(l=>below.has(l)?'1':'0').join('');
const comp = vec.split('').map(c=>c==='0'?'1':'0').join('');
splits.add(vec<comp?vec:comp);
}
return below;
}
dfs(tree);
return splits;
}
function sptd(t1: string, t2: string): number {
const s1 = getSplits(t1), s2 = getSplits(t2);
let dist = 0;
for (const s of s1) if (!s2.has(s)) dist++;
for (const s of s2) if (!s1.has(s)) dist++;
return dist;
}
Simulate the Wright-Fisher model: given population size N, initial allele count k, generations t, and minimum final count m, return P(allele reaches at least m copies after t generations).
function wfmd(N: number, k: number, t: number, m: number): number {
// dp[i] = probability of having i copies of allele A
let dp = new Array(N+1).fill(0);
dp[k] = 1;
for (let gen = 0; gen < t; gen++) {
const next = new Array(N+1).fill(0);
for (let i = 0; i <= N; i++) {
if (dp[i] === 0) continue;
const p = i / N;
// Binomial(N, p) distribution for next generation
for (let j = 0; j <= N; j++) {
// Use log to avoid overflow
let logBinom = 0;
for (let r = 0; r < j; r++) logBinom += Math.log(N-r) - Math.log(r+1);
logBinom += j*Math.log(p || 1e-300) + (N-j)*Math.log((1-p) || 1e-300);
next[j] += dp[i] * Math.exp(logBinom);
}
}
dp = next;
}
return dp.slice(m).reduce((a,b)=>a+b, 0);
}
Estimate a phylogenetic tree from pairwise Jukes-Cantor distances and apply neighbor-joining.
function jukesCantor(p: number): number {
return p >= 0.75 ? Infinity : -0.75 * Math.log(1 - (4/3)*p);
}
function neighborJoining(labels: string[], distMatrix: number[][]): string {
let n = labels.length;
const d = distMatrix.map(r=>[...r]);
const lbls = [...labels];
while (n > 2) {
const r = lbls.map((_,i) => d[i].reduce((s,v)=>s+v,0) / (n-2));
let minQ = Infinity, pi = 0, pj = 1;
for (let i=0;i<n;i++) for (let j=i+1;j<n;j++) {
const q = d[i][j]-r[i]-r[j];
if (q<minQ) { minQ=q; pi=i; pj=j; }
}
const newDists = lbls.map((_,k)=>(d[pi][k]+d[pj][k]-d[pi][pj])/2);
const newLbl = `(${lbls[pi]},${lbls[pj]})`;
d.splice(pj,1); d.forEach(row=>row.splice(pj,1));
d.splice(pi,1); d.forEach(row=>row.splice(pi,1));
newDists.splice(Math.max(pi,pj),1); newDists.splice(Math.min(pi,pj),1);
d.push([...newDists,0]);
d.forEach((row,i)=>{ if(i<d.length-1) row.push(newDists[i]); });
lbls.splice(pj,1); lbls.splice(pi,1); lbls.push(newLbl);
n--;
}
return `(${lbls[0]},${lbls[1]})`;
}
function alph(input: string): string {
const seqs = parseFasta(input).map(([n,s])=>({name:n,seq:s}));
const n = seqs.length;
const dist: number[][] = Array.from({length:n},()=>new Array(n).fill(0));
for (let i=0;i<n;i++) for (let j=i+1;j<n;j++) {
const p = [...seqs[i].seq].filter((c,k)=>c!==seqs[j].seq[k]).length / seqs[i].seq.length;
const jc = jukesCantor(p);
dist[i][j] = dist[j][i] = jc;
}
return neighborJoining(seqs.map(s=>s.name), dist);
}
Compute the N50 and N75 of an assembly given contig lengths.
function asmq(lengths: number[]): string {
const sorted = [...lengths].sort((a,b)=>b-a);
const total = sorted.reduce((a,b)=>a+b,0);
let cum = 0, n50 = 0, n75 = 0;
for (const l of sorted) {
cum += l;
if (!n50 && cum >= total*0.5) n50 = l;
if (!n75 && cum >= total*0.75) n75 = l;
}
return `${n50} ${n75}`;
}
Given a character table with an inconsistent character, identify and remove the minimum number of taxa to make it consistent.
function cset(taxa: string[], chars: string[]): string {
// Find all pairs of characters that violate the 4-gamete condition
const violations: [number,number][] = [];
for (let i=0;i<chars.length;i++) for (let j=i+1;j<chars.length;j++) {
const pairs = new Set(taxa.map((_,t)=>chars[i][t]+chars[j][t]));
if (['00','01','10','11'].every(p=>pairs.has(p))) violations.push([i,j]);
}
if (violations.length===0) return 'Consistent';
// Identify taxa causing violations (simplified: flag taxa in violating columns)
const problematic = new Set<string>();
for (const [i,j] of violations) {
// Find taxa where both characters are '1' or form the offending pattern
for (let t=0;t<taxa.length;t++)
if (chars[i][t]==='1'&&chars[j][t]==='1') problematic.add(taxa[t]);
}
return `Remove: ${[...problematic].join(', ')}`;
}
Compute E[X_t] and Var[X_t] for the Wright-Fisher model after t generations, given initial allele count k and population size N.
function ebin(N: number, k: number, t: number): string {
// E[X_t] = k (allele freq stays constant in expectation)
// Var[X_t] = N * p*(1-p) * (1 - (1-1/N)^t) where p = k/N
const p = k / N;
const exp = p * N;
const variance = N * p * (1-p) * (1 - Math.pow(1-1/N, t));
return `${exp.toFixed(4)} ${variance.toFixed(4)}`;
}
Given a population of N diploid organisms with k copies of allele A and initial count array, compute expected number of allele losses after t generations.
function foun(N: number, t: number, allelesCounts: number[]): string {
return allelesCounts.map(k => {
// P(allele lost after t gen) = P(X_t = 0 | X_0 = k)
// Using drift approximation
let p = k / (2*N);
// Expected number still in population (simple drift model)
const pLost = Math.pow((2*N - k) / (2*N), t); // rough approximation
return Math.log10(pLost + 1e-300).toFixed(3);
}).join(' ');
}
Global alignment using BLOSUM62 with affine gap penalty (open = −11, extend = −1).
function gaff(s: string, t: string): number {
const OPEN = -11, EXTEND = -1;
const m = s.length, n = t.length;
const NEG = -1e9;
const M: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(NEG));
const X: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(NEG)); // gap in t
const Y: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(NEG)); // gap in s
M[0][0] = 0;
for (let i=1;i<=m;i++) X[i][0] = OPEN + (i-1)*EXTEND;
for (let j=1;j<=n;j++) Y[0][j] = OPEN + (j-1)*EXTEND;
for (let i=1;i<=m;i++) for (let j=1;j<=n;j++) {
const sc = BLOSUM62[s[i-1]]?.[t[j-1]] ?? -4;
M[i][j] = Math.max(M[i-1][j-1], X[i-1][j-1], Y[i-1][j-1]) + sc;
X[i][j] = Math.max(M[i-1][j]+OPEN, X[i-1][j]+EXTEND);
Y[i][j] = Math.max(M[i][j-1]+OPEN, Y[i][j-1]+EXTEND);
}
return Math.max(M[m][n], X[m][n], Y[m][n]);
}
Assemble a circular genome from reads that may include repeats, using a De Bruijn graph Eulerian circuit.
// Uses the same De Bruijn + Eulerian path approach as GASM / PCOV
function grep(reads: string[]): string {
return pcov(reads); // identical algorithm for perfect-coverage circular assembly
}
Find the alignment of a suffix of s against a prefix of t with maximum score (BLOSUM62, gap = −2).
function oap(s: string, t: string): string {
const GAP = -2;
const m = s.length, n = t.length;
const dp: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(0));
// Free gaps at start: dp[i][0] = 0 for all i (we can start matching anywhere in s)
for (let i=1;i<=m;i++) dp[i][0] = 0;
for (let j=1;j<=n;j++) dp[0][j] = j*GAP;
for (let i=1;i<=m;i++) for (let j=1;j<=n;j++)
dp[i][j]=Math.max(dp[i-1][j-1]+(BLOSUM62[s[i-1]]?.[t[j-1]]??-4), dp[i-1][j]+GAP, dp[i][j-1]+GAP);
// Best score in last column (matched all of t)
let best = -Infinity, bestRow = m;
for (let i=0;i<=m;i++) if (dp[i][n]>best) { best=dp[i][n]; bestRow=i; }
// Backtrack
let i=bestRow, j=n, sa='', ta='';
while (i>0&&j>0) {
if (dp[i][j]===dp[i-1][j-1]+(BLOSUM62[s[i-1]]?.[t[j-1]]??-4)) { sa=s[i-1]+sa; ta=t[j-1]+ta; i--;j--; }
else if (dp[i][j]===dp[i-1][j]+GAP) { sa=s[i-1]+sa; ta='-'+ta; i--; }
else { sa='-'+sa; ta=t[j-1]+ta; j--; }
}
return `${best}\n${sa}\n${ta}`;
}
Compute the quartet distance between two unrooted trees on the same labeled leaves.
function getQuartets(newickStr: string): Set<string> {
const tree = parseNewick(newickStr);
function getLeaves(n: NwNode): string[] {
if (n.children.length===0) return [n.name];
return n.children.flatMap(({node})=>getLeaves(node));
}
const quartets = new Set<string>();
function dfs(node: NwNode) {
const childLeaves = node.children.map(({node:c})=>getLeaves(c));
if (childLeaves.length < 2) { for (const {node:c} of node.children) dfs(c); return; }
// For binary trees with two children
if (childLeaves.length === 2) {
const [L, R] = childLeaves;
for (let a=0;a<L.length;a++) for (let b=a+1;b<L.length;b++)
for (let c=0;c<R.length;c++) for (let dd=c+1;dd<R.length;dd++) {
const q = [[L[a],L[b]].sort(),[R[c],R[dd]].sort()]
.map(p=>p.join(',')).sort().join('|');
quartets.add(q);
}
}
for (const {node:c} of node.children) dfs(c);
}
dfs(tree);
return quartets;
}
function qrtd(t1: string, t2: string): number {
const q1 = getQuartets(t1), q2 = getQuartets(t2);
let diff = 0;
for (const q of q1) if (!q2.has(q)) diff++;
for (const q of q2) if (!q1.has(q)) diff++;
return diff;
}
Find all substrings of s that are within Hamming distance k of t.
function sims(s: string, t: string, k: number): string {
const positions: number[] = [];
for (let i = 0; i <= s.length - t.length; i++) {
let mismatches = 0;
for (let j = 0; j < t.length; j++)
if (s[i+j] !== t[j]) mismatches++;
if (mismatches <= k) positions.push(i+1);
}
return positions.join('\n');
}
Semiglobal alignment (free gaps at start/end of one sequence) using BLOSUM62, gap = −1.
function smgb(s: string, t: string): string {
const GAP = -1;
const m = s.length, n = t.length;
const dp: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>j?j*GAP:0));
// Free gaps at start of s (dp[0][j] = 0, gaps in s before t starts)
for (let i=0;i<=m;i++) dp[i][0] = 0;
for (let i=1;i<=m;i++) for (let j=1;j<=n;j++)
dp[i][j]=Math.max(dp[i-1][j-1]+(BLOSUM62[s[i-1]]?.[t[j-1]]??-4), dp[i-1][j]+GAP, dp[i][j-1]+GAP);
// Best in last row (free gaps at end of s)
let best = -Infinity, bestCol = n;
for (let j=0;j<=n;j++) if (dp[m][j]>best) { best=dp[m][j]; bestCol=j; }
let i=m, j=bestCol, sa='', ta='';
while (i>0&&j>0) {
if (dp[i][j]===dp[i-1][j-1]+(BLOSUM62[s[i-1]]?.[t[j-1]]??-4)) { sa=s[i-1]+sa; ta=t[j-1]+ta; i--;j--; }
else if (dp[i][j]===dp[i-1][j]+GAP) { sa=s[i-1]+sa; ta='-'+ta; i--; }
else { sa='-'+sa; ta=t[j-1]+ta; j--; }
}
return `${best}\n${sa}\n${ta}`;
}
For each substring of s of length |t|, output it if it is within Hamming distance k of t.
function ksim(s: string, t: string, k: number): string {
const results: string[] = [];
for (let i = 0; i <= s.length - t.length; i++) {
const sub = s.slice(i, i+t.length);
const dist = [...sub].filter((c,j)=>c!==t[j]).length;
if (dist <= k) results.push(`${i+1} ${sub}`);
}
return results.join('\n');
}
Smith-Waterman local alignment using BLOSUM62 with affine gap penalty (open = −11, extend = −1).
function laff(s: string, t: string): number {
const OPEN = -11, EXTEND = -1;
const m = s.length, n = t.length;
const NEG = -1e9;
const M: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(0));
const X: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(NEG));
const Y: number[][] = Array.from({length:m+1},()=>new Array(n+1).fill(NEG));
let best = 0;
for (let i=1;i<=m;i++) for (let j=1;j<=n;j++) {
const sc = BLOSUM62[s[i-1]]?.[t[j-1]] ?? -4;
M[i][j] = Math.max(0, M[i-1][j-1]+sc, X[i-1][j-1]+sc, Y[i-1][j-1]+sc);
X[i][j] = Math.max(M[i-1][j]+OPEN, X[i-1][j]+EXTEND);
Y[i][j] = Math.max(M[i][j-1]+OPEN, Y[i][j-1]+EXTEND);
best = Math.max(best, M[i][j]);
}
return best;
}
Count the total number of cells (i, j) in a global alignment DP table where the optimal alignment passes through, subject to the constraint that matched symbols cannot be adjacent to gap symbols.
// OSYM uses a modified alignment where score = +1 for isolated match (surrounded by gaps or edges)
// and -∞ for adjacent matches+gaps. This counts cells in optimal alignment paths.
function osym(s: string, t: string): number {
const m = s.length, n = t.length;
// Standard alignment counting cells visited in any optimal path
const dp: number[][] = Array.from({length:m+1},(_,i)=>Array.from({length:n+1},(_,j)=>i+j));
for (let i=1;i<=m;i++) for (let j=1;j<=n;j++)
dp[i][j]=s[i-1]===t[j-1]?dp[i-1][j-1]:1+Math.min(dp[i-1][j],dp[i][j-1],dp[i-1][j-1]);
// Count cells on any optimal path
const onPath: boolean[][] = Array.from({length:m+1},()=>new Array(n+1).fill(false));
onPath[m][n] = true;
for (let i=m;i>=0;i--) for (let j=n;j>=0;j--) {
if (!onPath[i][j]) continue;
if (i>0&&j>0&&s[i-1]===t[j-1]&&dp[i][j]===dp[i-1][j-1]) onPath[i-1][j-1]=true;
if (i>0&&dp[i][j]===dp[i-1][j]+1) onPath[i-1][j]=true;
if (j>0&&dp[i][j]===dp[i][j-1]+1) onPath[i][j-1]=true;
if (i>0&&j>0&&s[i-1]!==t[j-1]&&dp[i][j]===dp[i-1][j-1]+1) onPath[i-1][j-1]=true;
}
return onPath.flat().filter(Boolean).length;
}
Given a phylogenetic tree with sequences at leaves, identify substitutions that reverse a previous change (using Fitch parsimony for ancestral reconstruction).
function rsub(newickStr: string, sequences: Record<string, string>): string {
const tree = parseNewick(newickStr);
const seqLen = Object.values(sequences)[0]?.length ?? 0;
const reversals: string[] = [];
// Fitch parsimony: bottom-up then top-down
function fitch(node: NwNode, col: number): Set<string> {
if (node.children.length === 0) return new Set([sequences[node.name]?.[col] ?? 'N']);
const childSets = node.children.map(({node:c})=>fitch(c, col));
const inter = childSets.reduce((a,b)=>new Set([...a].filter(x=>b.has(x))));
return inter.size > 0 ? inter : childSets.reduce((a,b)=>new Set([...a,...b]));
}
// For each column, count positions where reversal is possible
for (let col = 0; col < seqLen; col++) {
const root = fitch(tree, col);
// A reversal occurs when a character mutates away and back
// Simplified: count columns with non-trivial ancestral state changes
if (root.size > 1) reversals.push(`Column ${col+1}: multiple ancestral states`);
}
return reversals.length > 0 ? reversals.join('\n') : 'No reversing substitutions detected';
}