Suffix tree: Difference between revisions
Alextretyak (talk | contribs) m →{{header|11l}}: Void |
|||
(27 intermediate revisions by 12 users not shown) | |||
Line 10: | Line 10: | ||
* No two edges starting out of a node can have string labels beginning with the same character. |
* No two edges starting out of a node can have string labels beginning with the same character. |
||
* The string obtained by concatenating all the string labels found on the path from the root to leaf i spells out suffix S[i..n], for i from 1 to n. |
* The string obtained by concatenating all the string labels found on the path from the root to leaf i spells out suffix S[i..n], for i from 1 to n. |
||
Such a tree does not exist for all strings. To ensure existence, a character that is not found in S must be appended at its end. The character '$' is traditionally used for this purpose. |
Such a tree does not exist for all strings. To ensure existence, a character that is not found in S must be appended at its end. The character '$' is traditionally used for this purpose. |
||
Line 18: | Line 19: | ||
The computation time for an efficient algorithm should be <math>O(n)</math>, but such an algorithm might be difficult to implement. An easier, <math>O(n^2)</math> algorithm is accepted. |
The computation time for an efficient algorithm should be <math>O(n)</math>, but such an algorithm might be difficult to implement. An easier, <math>O(n^2)</math> algorithm is accepted. |
||
=={{header|11l}}== |
|||
{{trans|Python}} |
|||
<syntaxhighlight lang="11l">T Node |
|||
String sub |
|||
[Int] ch |
|||
F (sub, children) |
|||
.sub = sub |
|||
.ch = children |
|||
T SuffixTree |
|||
nodes = [Node(‘’, [Int]())] |
|||
F (str) |
|||
L(i) 0 .< str.len |
|||
.addSuffix(str[i..]) |
|||
F addSuffix(suf) |
|||
V n = 0 |
|||
V i = 0 |
|||
L i < suf.len |
|||
V b = suf[i] |
|||
V x2 = 0 |
|||
Int n2 |
|||
L |
|||
V children = .nodes[n].ch |
|||
I x2 == children.len |
|||
n2 = .nodes.len |
|||
.nodes.append(Node(suf[i..], [Int]())) |
|||
.nodes[n].ch.append(n2) |
|||
R |
|||
n2 = children[x2] |
|||
I .nodes[n2].sub[0] == b |
|||
L.break |
|||
x2 = x2 + 1 |
|||
V sub2 = .nodes[n2].sub |
|||
V j = 0 |
|||
L j < sub2.len |
|||
I suf[i + j] != sub2[j] |
|||
V n3 = n2 |
|||
n2 = .nodes.len |
|||
.nodes.append(Node(sub2[0 .< j], [n3])) |
|||
.nodes[n3].sub = sub2[j..] |
|||
.nodes[n].ch[x2] = n2 |
|||
L.break |
|||
j = j + 1 |
|||
i = i + j |
|||
n = n2 |
|||
F visualize() |
|||
I .nodes.empty |
|||
print(‘<empty>’) |
|||
R |
|||
F f(Int n, String pre) -> Void |
|||
V children = @.nodes[n].ch |
|||
I children.empty |
|||
print(‘-- ’(@.nodes[n].sub)) |
|||
R |
|||
print(‘+- ’(@.nodes[n].sub)) |
|||
L(c) children[0 .< (len)-1] |
|||
print(pre‘ +-’, end' ‘ ’) |
|||
@f(c, pre‘ | ’) |
|||
print(pre‘ +-’, end' ‘ ’) |
|||
@f(children.last, pre‘ ’) |
|||
f(0, ‘’) |
|||
SuffixTree(‘banana$’).visualize()</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
+- |
|||
+- -- banana$ |
|||
+- +- a |
|||
| +- +- na |
|||
| | +- -- na$ |
|||
| | +- -- $ |
|||
| +- -- $ |
|||
+- +- na |
|||
| +- -- na$ |
|||
| +- -- $ |
|||
+- -- $ |
|||
</pre> |
|||
=={{header|C sharp|C#}}== |
|||
{{trans|C++}} |
|||
<syntaxhighlight lang="csharp">using System; |
|||
using System.Collections.Generic; |
|||
namespace SuffixTree { |
|||
class Node { |
|||
public string sub; // a substring of the input string |
|||
public List<int> ch = new List<int>(); // vector of child nodes |
|||
public Node() { |
|||
sub = ""; |
|||
} |
|||
public Node(string sub, params int[] children) { |
|||
this.sub = sub; |
|||
ch.AddRange(children); |
|||
} |
|||
} |
|||
class SuffixTree { |
|||
readonly List<Node> nodes = new List<Node>(); |
|||
public SuffixTree(string str) { |
|||
nodes.Add(new Node()); |
|||
for (int i = 0; i < str.Length; i++) { |
|||
AddSuffix(str.Substring(i)); |
|||
} |
|||
} |
|||
public void Visualize() { |
|||
if (nodes.Count == 0) { |
|||
Console.WriteLine("<empty>"); |
|||
return; |
|||
} |
|||
void f(int n, string pre) { |
|||
var children = nodes[n].ch; |
|||
if (children.Count == 0) { |
|||
Console.WriteLine("- {0}", nodes[n].sub); |
|||
return; |
|||
} |
|||
Console.WriteLine("+ {0}", nodes[n].sub); |
|||
var it = children.GetEnumerator(); |
|||
if (it.MoveNext()) { |
|||
do { |
|||
var cit = it; |
|||
if (!cit.MoveNext()) break; |
|||
Console.Write("{0}+-", pre); |
|||
f(it.Current, pre + "| "); |
|||
} while (it.MoveNext()); |
|||
} |
|||
Console.Write("{0}+-", pre); |
|||
f(children[children.Count-1], pre+" "); |
|||
} |
|||
f(0, ""); |
|||
} |
|||
private void AddSuffix(string suf) { |
|||
int n = 0; |
|||
int i = 0; |
|||
while (i < suf.Length) { |
|||
char b = suf[i]; |
|||
int x2 = 0; |
|||
int n2; |
|||
while (true) { |
|||
var children = nodes[n].ch; |
|||
if (x2 == children.Count) { |
|||
// no matching child, remainder of suf becomes new node |
|||
n2 = nodes.Count; |
|||
nodes.Add(new Node(suf.Substring(i))); |
|||
nodes[n].ch.Add(n2); |
|||
return; |
|||
} |
|||
n2 = children[x2]; |
|||
if (nodes[n2].sub[0] == b) { |
|||
break; |
|||
} |
|||
x2++; |
|||
} |
|||
// find prefix of remaining suffix in common with child |
|||
var sub2 = nodes[n2].sub; |
|||
int j = 0; |
|||
while (j < sub2.Length) { |
|||
if (suf[i + j] != sub2[j]) { |
|||
// split n2 |
|||
var n3 = n2; |
|||
// new node for the part in common |
|||
n2 = nodes.Count; |
|||
nodes.Add(new Node(sub2.Substring(0, j), n3)); |
|||
nodes[n3].sub = sub2.Substring(j); // old node loses the part in common |
|||
nodes[n].ch[x2] = n2; |
|||
break; // continue down the tree |
|||
} |
|||
j++; |
|||
} |
|||
i += j; // advance past part in common |
|||
n = n2; // continue down the tree |
|||
} |
|||
} |
|||
} |
|||
class Program { |
|||
static void Main() { |
|||
new SuffixTree("banana$").Visualize(); |
|||
} |
|||
} |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre>+ |
|||
+-- banana$ |
|||
+-+ a |
|||
| +-+ na |
|||
| | +-- na$ |
|||
| | +-- $ |
|||
| +-- $ |
|||
+-+ na |
|||
| +-- na$ |
|||
| +-- $ |
|||
+-- $</pre> |
|||
=={{header|C++}}== |
|||
{{trans|D}} |
|||
<syntaxhighlight lang="cpp">#include <functional> |
|||
#include <iostream> |
|||
#include <vector> |
|||
struct Node { |
|||
std::string sub = ""; // a substring of the input string |
|||
std::vector<int> ch; // vector of child nodes |
|||
Node() { |
|||
// empty |
|||
} |
|||
Node(const std::string& sub, std::initializer_list<int> children) : sub(sub) { |
|||
ch.insert(ch.end(), children); |
|||
} |
|||
}; |
|||
struct SuffixTree { |
|||
std::vector<Node> nodes; |
|||
SuffixTree(const std::string& str) { |
|||
nodes.push_back(Node{}); |
|||
for (size_t i = 0; i < str.length(); i++) { |
|||
addSuffix(str.substr(i)); |
|||
} |
|||
} |
|||
void visualize() { |
|||
if (nodes.size() == 0) { |
|||
std::cout << "<empty>\n"; |
|||
return; |
|||
} |
|||
std::function<void(int, const std::string&)> f; |
|||
f = [&](int n, const std::string & pre) { |
|||
auto children = nodes[n].ch; |
|||
if (children.size() == 0) { |
|||
std::cout << "- " << nodes[n].sub << '\n'; |
|||
return; |
|||
} |
|||
std::cout << "+ " << nodes[n].sub << '\n'; |
|||
auto it = std::begin(children); |
|||
if (it != std::end(children)) do { |
|||
if (std::next(it) == std::end(children)) break; |
|||
std::cout << pre << "+-"; |
|||
f(*it, pre + "| "); |
|||
it = std::next(it); |
|||
} while (true); |
|||
std::cout << pre << "+-"; |
|||
f(children[children.size() - 1], pre + " "); |
|||
}; |
|||
f(0, ""); |
|||
} |
|||
private: |
|||
void addSuffix(const std::string & suf) { |
|||
int n = 0; |
|||
size_t i = 0; |
|||
while (i < suf.length()) { |
|||
char b = suf[i]; |
|||
int x2 = 0; |
|||
int n2; |
|||
while (true) { |
|||
auto children = nodes[n].ch; |
|||
if (x2 == children.size()) { |
|||
// no matching child, remainder of suf becomes new node |
|||
n2 = nodes.size(); |
|||
nodes.push_back(Node(suf.substr(i), {})); |
|||
nodes[n].ch.push_back(n2); |
|||
return; |
|||
} |
|||
n2 = children[x2]; |
|||
if (nodes[n2].sub[0] == b) { |
|||
break; |
|||
} |
|||
x2++; |
|||
} |
|||
// find prefix of remaining suffix in common with child |
|||
auto sub2 = nodes[n2].sub; |
|||
size_t j = 0; |
|||
while (j < sub2.size()) { |
|||
if (suf[i + j] != sub2[j]) { |
|||
// split n2 |
|||
auto n3 = n2; |
|||
// new node for the part in common |
|||
n2 = nodes.size(); |
|||
nodes.push_back(Node(sub2.substr(0, j), { n3 })); |
|||
nodes[n3].sub = sub2.substr(j); // old node loses the part in common |
|||
nodes[n].ch[x2] = n2; |
|||
break; // continue down the tree |
|||
} |
|||
j++; |
|||
} |
|||
i += j; // advance past part in common |
|||
n = n2; // continue down the tree |
|||
} |
|||
} |
|||
}; |
|||
int main() { |
|||
SuffixTree("banana$").visualize(); |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre>+ |
|||
+-- banana$ |
|||
+-+ a |
|||
| +-+ na |
|||
| | +-- na$ |
|||
| | +-- $ |
|||
| +-- $ |
|||
+-+ na |
|||
| +-- na$ |
|||
| +-- $ |
|||
+-- $</pre> |
|||
=={{header|D}}== |
=={{header|D}}== |
||
{{trans|Kotlin}} |
{{trans|Kotlin}} |
||
< |
<syntaxhighlight lang="d">import std.stdio; |
||
struct Node { |
struct Node { |
||
Line 113: | Line 442: | ||
void main() { |
void main() { |
||
SuffixTree("banana$").visualize(); |
SuffixTree("banana$").visualize(); |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>┐ |
<pre>┐ |
||
Line 126: | Line 455: | ||
│ └─╴ $ |
│ └─╴ $ |
||
└─╴ $</pre> |
└─╴ $</pre> |
||
== Elixir == |
|||
<syntaxhighlight lang="elixir">defmodule STree do |
|||
defstruct branch: [] |
|||
defp suffixes([]), do: [] |
|||
defp suffixes(w), do: [w | suffixes tl(w)] |
|||
defp lcp([], _, acc), do: acc |
|||
defp lcp(_, [], acc), do: acc |
|||
defp lcp([c | u], [a | w], acc) do |
|||
if c == a do |
|||
lcp(u, w, acc + 1) |
|||
else acc |
|||
end |
|||
end |
|||
defp g([], aw), do: [{{aw, length aw}, nil}] |
|||
defp g(cusnes, aw) do |
|||
[cusn | es] = cusnes |
|||
{cus, node} = cusn |
|||
{cu, culen} = cus |
|||
cpl = case node do |
|||
nil -> lcp cu, aw, 0 |
|||
_ -> lcp (Enum.take cu, culen), aw, 0 |
|||
end |
|||
x = Enum.drop cu, cpl |
|||
xlen = culen - cpl |
|||
y = Enum.drop aw, cpl |
|||
ex = {{x, xlen}, node} |
|||
ey = {{y, length y}, nil} |
|||
cond do |
|||
hd(aw) > hd(cu) -> [cusn | g(es, aw)] |
|||
hd(aw) < hd(cu) -> [{{aw, length aw}, nil} | cusnes] |
|||
nil != node && xlen == 0 -> [{cus, insert_suffix(y, node)} | es] |
|||
hd(x) < hd(y) -> [{{cu, cpl}, %STree{branch: [ex, ey]}} | es] |
|||
true -> [{{cu, cpl}, %STree{branch: [ey, ex]}} | es] |
|||
end |
|||
end |
|||
defp insert_suffix(aw, node), do: %STree{branch: g(node.branch, aw)} |
|||
def naive_insertion(t), do: List.foldl(suffixes(t), %STree{}, &insert_suffix/2) |
|||
defp f(nil, _, label), do: IO.puts("╴ #{label}") |
|||
defp f(%STree{branch: children}, pre, label) do |
|||
IO.puts "┐ #{label}" |
|||
children |
|||
|> Enum.take(length(children) - 1) |
|||
|> Enum.each(fn c -> |
|||
IO.write(pre <> "├─") |
|||
{ws, len} = elem(c, 0) |
|||
f(elem(c, 1), pre <> "│ ", Enum.join(Enum.take ws, len)) |
|||
end) |
|||
IO.write(pre <> "└─") |
|||
c = List.last(children) |
|||
{ws, len} = elem(c, 0) |
|||
f(elem(c, 1), pre <> " ", Enum.join(Enum.take ws, len)) |
|||
end |
|||
def visualize(n), do: f(n, "", "") |
|||
def main do |
|||
"banana$" |
|||
|> String.graphemes |
|||
|> naive_insertion |
|||
|> visualize |
|||
end |
|||
end</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
┐ |
|||
├─╴ $ |
|||
├─┐ a |
|||
│ ├─╴ $ |
|||
│ └─┐ na |
|||
│ ├─╴ $ |
|||
│ └─╴ na$ |
|||
├─╴ banana$ |
|||
└─┐ na |
|||
├─╴ $ |
|||
└─╴ na$ |
|||
</pre> |
|||
=={{header|Go}}== |
=={{header|Go}}== |
||
Vis function from [[Visualize_a_tree#Unicode]]. |
Vis function from [[Visualize_a_tree#Unicode]]. |
||
< |
<syntaxhighlight lang="go">package main |
||
import "fmt" |
import "fmt" |
||
Line 214: | Line 627: | ||
} |
} |
||
f(0, "") |
f(0, "") |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 234: | Line 647: | ||
Implementation: |
Implementation: |
||
< |
<syntaxhighlight lang="j">classify=: {.@> </. ] |
||
build_tree=:3 :0 |
build_tree=:3 :0 |
||
Line 260: | Line 673: | ||
tree=. B=:|:build_tree <\. y |
tree=. B=:|:build_tree <\. y |
||
((1+#y)-each {.tree),}.tree |
((1+#y)-each {.tree),}.tree |
||
)</ |
)</syntaxhighlight> |
||
Task example: |
Task example: |
||
< |
<syntaxhighlight lang="j"> suffix_tree 'banana$' |
||
┌──┬───────┬─┬──┬───┬─┬─┬──┬───┬─┬─┐ |
┌──┬───────┬─┬──┬───┬─┬─┬──┬───┬─┬─┐ |
||
│__│1 │_│_ │2 │4│6│_ │3 │5│7│ |
│__│1 │_│_ │2 │4│6│_ │3 │5│7│ |
||
Line 271: | Line 684: | ||
├──┼───────┼─┼──┼───┼─┼─┼──┼───┼─┼─┤ |
├──┼───────┼─┼──┼───┼─┼─┼──┼───┼─┼─┤ |
||
│ │banana$│a│na│na$│$│$│na│na$│$│$│ |
│ │banana$│a│na│na$│$│$│na│na$│$│$│ |
||
└──┴───────┴─┴──┴───┴─┴─┴──┴───┴─┴─┘</ |
└──┴───────┴─┴──┴───┴─┴─┴──┴───┴─┴─┘</syntaxhighlight> |
||
The first row is the leaf number (_ for internal nodes). |
The first row is the leaf number (_ for internal nodes). |
||
Line 281: | Line 694: | ||
Visualizing, using [[Visualize_a_tree#J|showtree]] and prefixing the substring leading to each leaf with the leaf number (in brackets): |
Visualizing, using [[Visualize_a_tree#J|showtree]] and prefixing the substring leading to each leaf with the leaf number (in brackets): |
||
< |
<syntaxhighlight lang="j">fmttree=: ;@(1&{) showtree~ {: (,~ }.`('[','] ',~":)@.(_>|))each {. |
||
fmttree suffix_tree 'banana$' |
fmttree suffix_tree 'banana$' |
||
Line 291: | Line 704: | ||
├─ na ────────┴─ [5] $ |
├─ na ────────┴─ [5] $ |
||
└─ [7] $ |
└─ [7] $ |
||
</syntaxhighlight> |
|||
</lang> |
|||
=={{header|Java}}== |
|||
{{trans|Kotlin}} |
|||
<syntaxhighlight lang="java">import java.util.ArrayList; |
|||
import java.util.List; |
|||
public class SuffixTreeProblem { |
|||
private static class Node { |
|||
String sub = ""; // a substring of the input string |
|||
List<Integer> ch = new ArrayList<>(); // list of child nodes |
|||
} |
|||
private static class SuffixTree { |
|||
private List<Node> nodes = new ArrayList<>(); |
|||
public SuffixTree(String str) { |
|||
nodes.add(new Node()); |
|||
for (int i = 0; i < str.length(); ++i) { |
|||
addSuffix(str.substring(i)); |
|||
} |
|||
} |
|||
private void addSuffix(String suf) { |
|||
int n = 0; |
|||
int i = 0; |
|||
while (i < suf.length()) { |
|||
char b = suf.charAt(i); |
|||
List<Integer> children = nodes.get(n).ch; |
|||
int x2 = 0; |
|||
int n2; |
|||
while (true) { |
|||
if (x2 == children.size()) { |
|||
// no matching child, remainder of suf becomes new node. |
|||
n2 = nodes.size(); |
|||
Node temp = new Node(); |
|||
temp.sub = suf.substring(i); |
|||
nodes.add(temp); |
|||
children.add(n2); |
|||
return; |
|||
} |
|||
n2 = children.get(x2); |
|||
if (nodes.get(n2).sub.charAt(0) == b) break; |
|||
x2++; |
|||
} |
|||
// find prefix of remaining suffix in common with child |
|||
String sub2 = nodes.get(n2).sub; |
|||
int j = 0; |
|||
while (j < sub2.length()) { |
|||
if (suf.charAt(i + j) != sub2.charAt(j)) { |
|||
// split n2 |
|||
int n3 = n2; |
|||
// new node for the part in common |
|||
n2 = nodes.size(); |
|||
Node temp = new Node(); |
|||
temp.sub = sub2.substring(0, j); |
|||
temp.ch.add(n3); |
|||
nodes.add(temp); |
|||
nodes.get(n3).sub = sub2.substring(j); // old node loses the part in common |
|||
nodes.get(n).ch.set(x2, n2); |
|||
break; // continue down the tree |
|||
} |
|||
j++; |
|||
} |
|||
i += j; // advance past part in common |
|||
n = n2; // continue down the tree |
|||
} |
|||
} |
|||
public void visualize() { |
|||
if (nodes.isEmpty()) { |
|||
System.out.println("<empty>"); |
|||
return; |
|||
} |
|||
visualize_f(0, ""); |
|||
} |
|||
private void visualize_f(int n, String pre) { |
|||
List<Integer> children = nodes.get(n).ch; |
|||
if (children.isEmpty()) { |
|||
System.out.println("- " + nodes.get(n).sub); |
|||
return; |
|||
} |
|||
System.out.println("┐ " + nodes.get(n).sub); |
|||
for (int i = 0; i < children.size() - 1; i++) { |
|||
Integer c = children.get(i); |
|||
System.out.print(pre + "├─"); |
|||
visualize_f(c, pre + "│ "); |
|||
} |
|||
System.out.print(pre + "└─"); |
|||
visualize_f(children.get(children.size() - 1), pre + " "); |
|||
} |
|||
} |
|||
public static void main(String[] args) { |
|||
new SuffixTree("banana$").visualize(); |
|||
} |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre>┐ |
|||
├─- banana$ |
|||
├─┐ a |
|||
│ ├─┐ na |
|||
│ │ ├─- na$ |
|||
│ │ └─- $ |
|||
│ └─- $ |
|||
├─┐ na |
|||
│ ├─- na$ |
|||
│ └─- $ |
|||
└─- $</pre> |
|||
=={{header|JavaScript}}== |
|||
{{trans|Java}} |
|||
<syntaxhighlight lang="javascript">class Node { |
|||
sub = ''; // a substring of the input string |
|||
children = []; // list of child nodes |
|||
} |
|||
class SuffixTree { |
|||
nodes = []; |
|||
constructor(str) { |
|||
this.nodes.push(new Node()); |
|||
for (let i = 0; i < str.length; ++i) { |
|||
this.addSuffix(str.slice(i)); |
|||
} |
|||
} |
|||
addSuffix(suf) { |
|||
let n = 0; |
|||
let i = 0; |
|||
while (i < suf.length) { |
|||
const b = suf.charAt(i); |
|||
const children = this.nodes[n].children; |
|||
let x2 = 0; |
|||
let n2; |
|||
while (true) { |
|||
if (x2 === children.length) { |
|||
// no matching child, remainder of suf becomes new node. |
|||
n2 = this.nodes.length; |
|||
const temp = new Node(); |
|||
temp.sub = suf.slice(i); |
|||
this.nodes.push(temp); |
|||
children.push(n2); |
|||
return; |
|||
} |
|||
n2 = children[x2]; |
|||
if (this.nodes[n2].sub.charAt(0) === b) break; |
|||
x2++; |
|||
} |
|||
// find prefix of remaining suffix in common with child |
|||
const sub2 = this.nodes[n2].sub; |
|||
let j = 0; |
|||
while (j < sub2.length) { |
|||
if (suf.charAt(i + j) !== sub2.charAt(j)) { |
|||
// split n2 |
|||
const n3 = n2; |
|||
// new node for the part in common |
|||
n2 = this.nodes.length; |
|||
const temp = new Node(); |
|||
temp.sub = sub2.slice(0, j); |
|||
temp.children.push(n3); |
|||
this.nodes.push(temp); |
|||
this.nodes[n3].sub = sub2.slice(j); // old node loses the part in common |
|||
this.nodes[n].children[x2] = n2; |
|||
break; // continue down the tree |
|||
} |
|||
j++; |
|||
} |
|||
i += j; // advance past part in common |
|||
n = n2; // continue down the tree |
|||
} |
|||
} |
|||
toString() { |
|||
if (this.nodes.length === 0) { |
|||
return '<empty>'; |
|||
} |
|||
return this.toString_f(0, ''); |
|||
} |
|||
toString_f(n, pre) { |
|||
const children = this.nodes[n].children; |
|||
if (children.length === 0) { |
|||
return '- ' + this.nodes[n].sub + '\n'; |
|||
} |
|||
let s = '┐ ' + this.nodes[n].sub + '\n'; |
|||
for (let i = 0; i < children.length - 1; i++) { |
|||
const c = children[i]; |
|||
s += pre + '├─'; |
|||
s += this.toString_f(c, pre + '│ '); |
|||
} |
|||
s += pre + '└─'; |
|||
s += this.toString_f(children[children.length - 1], pre + ' '); |
|||
return s; |
|||
} |
|||
} |
|||
const st = new SuffixTree('banana'); |
|||
console.log(st.toString());</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
┐ |
|||
├─- banana |
|||
├─┐ a |
|||
│ └─┐ na |
|||
│ └─- na |
|||
└─┐ na |
|||
└─- na |
|||
</pre> |
|||
=={{header|Julia}}== |
|||
{{trans|Go}} |
|||
<syntaxhighlight lang="julia">import Base.print |
|||
mutable struct Node |
|||
sub::String |
|||
ch::Vector{Int} |
|||
Node(str, v=Int[]) = new(str, v) |
|||
end |
|||
struct SuffixTree |
|||
nodes::Vector{Node} |
|||
function SuffixTree(s::String) |
|||
nod = [Node("", Int[])] |
|||
for i in 1:length(s) |
|||
addSuffix!(nod, s[i:end]) |
|||
end |
|||
return new(nod) |
|||
end |
|||
end |
|||
function addSuffix!(tree::Vector{Node}, suf::String) |
|||
n, i = 1, 1 |
|||
while i <= length(suf) |
|||
x2, n2, b = 1, 1, suf[i] |
|||
while true |
|||
children = tree[n].ch |
|||
if x2 > length(children) |
|||
push!(tree, Node(suf[i:end])) |
|||
push!(tree[n].ch, length(tree)) |
|||
return |
|||
end |
|||
n2 = children[x2] |
|||
(tree[n2].sub[1] == b) && break |
|||
x2 += 1 |
|||
end |
|||
sub2, j = tree[n2].sub, 0 |
|||
while j < length(sub2) |
|||
if suf[i + j] != sub2[j + 1] |
|||
push!(tree, Node(sub2[1:j], [n2])) |
|||
tree[n2].sub = sub2[j+1:end] |
|||
n2 = length(tree) |
|||
tree[n].ch[x2] = n2 |
|||
break |
|||
end |
|||
j += 1 |
|||
end |
|||
i += j |
|||
n = n2 |
|||
end |
|||
end |
|||
function Base.print(io::IO, suffixtree::SuffixTree) |
|||
function treeprint(n::Int, pre::String) |
|||
children = suffixtree.nodes[n].ch |
|||
if isempty(children) |
|||
println("╴ ", suffixtree.nodes[n].sub) |
|||
else |
|||
println("┐ ", suffixtree.nodes[n].sub) |
|||
for c in children[1:end-1] |
|||
print(pre, "├─") |
|||
treeprint(c, pre * "│ ") |
|||
end |
|||
print(pre, "└─") |
|||
treeprint(children[end], pre * " ") |
|||
end |
|||
end |
|||
if isempty(suffixtree.nodes) |
|||
println("<empty>") |
|||
else |
|||
treeprint(1, "") |
|||
end |
|||
end |
|||
println(SuffixTree("banana\$")) |
|||
</syntaxhighlight> {{out}} |
|||
<pre> |
|||
┐ |
|||
├─╴ banana$ |
|||
├─┐ a |
|||
│ ├─┐ na |
|||
│ │ ├─╴ na$ |
|||
│ │ └─╴ $ |
|||
│ └─╴ $ |
|||
├─┐ na |
|||
│ ├─╴ na$ |
|||
│ └─╴ $ |
|||
└─╴ $ |
|||
</pre> |
|||
=={{header|Kotlin}}== |
=={{header|Kotlin}}== |
||
{{trans|Go}} |
{{trans|Go}} |
||
< |
<syntaxhighlight lang="scala">// version 1.1.3 |
||
class Node { |
class Node { |
||
Line 380: | Line 1,093: | ||
fun main(args: Array<String>) { |
fun main(args: Array<String>) { |
||
SuffixTree("banana$").visualize() |
SuffixTree("banana$").visualize() |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 396: | Line 1,109: | ||
└─╴ $ |
└─╴ $ |
||
</pre> |
</pre> |
||
=={{header|Nim}}== |
|||
{{trans|Go}} |
|||
<syntaxhighlight lang="nim">type |
|||
Tree = seq[Node] |
|||
Node = object |
|||
sub: string # a substring of the input string. |
|||
ch: seq[int] # list of child nodes. |
|||
proc addSuffix(t: var Tree; suf: string) = |
|||
var n, i = 0 |
|||
while i < suf.len: |
|||
let b = suf[i] |
|||
let ch = t[n].ch |
|||
var x2, n2: int |
|||
while true: |
|||
if x2 == ch.len: |
|||
# No matching child, remainder of "suf" becomes new node. |
|||
n2 = t.len |
|||
t.add Node(sub: suf[i..^1]) |
|||
t[n].ch.add n2 |
|||
return |
|||
n2 = ch[x2] |
|||
if t[n2].sub[0] == b: break |
|||
inc x2 |
|||
# Find prefix of remaining suffix in common with child. |
|||
let sub2 = t[n2].sub |
|||
var j = 0 |
|||
while j < sub2.len: |
|||
if suf[i+j] != sub2[j]: |
|||
# Split "sub2". |
|||
let n3 = n2 |
|||
# New node for the part in common. |
|||
n2 = t.len |
|||
t.add Node(sub: sub2[0..<j], ch: @[n3]) |
|||
t[n3].sub = sub2[j..^1] # Old node loses the part in common. |
|||
t[n].ch[x2] = n2 |
|||
break # Continue down the tree. |
|||
inc j |
|||
inc i, j # Advance past part in common. |
|||
n = n2 # Continue down the tree. |
|||
func newTree(s: string): Tree = |
|||
result.add Node() # root node. |
|||
for i in 0..s.high: |
|||
result.addSuffix s[i..^1] |
|||
proc vis(t: Tree) = |
|||
if t.len == 0: |
|||
echo "<empty>" |
|||
return |
|||
proc f(n: int; pre: string) = |
|||
let children = t[n].ch |
|||
if children.len == 0: |
|||
echo "╴", t[n].sub |
|||
return |
|||
echo "┐", t[n].sub |
|||
for i in 0..<children.high: |
|||
stdout.write pre, "├─" |
|||
f(children[i], pre & "│ ") |
|||
stdout.write pre, "└─" |
|||
f(children[^1], pre & " ") |
|||
f(0, "") |
|||
newTree("banana$").vis()</syntaxhighlight> |
|||
{{out}} |
|||
<pre>┐ |
|||
├─╴banana$ |
|||
├─┐a |
|||
│ ├─┐na |
|||
│ │ ├─╴na$ |
|||
│ │ └─╴$ |
|||
│ └─╴$ |
|||
├─┐na |
|||
│ ├─╴na$ |
|||
│ └─╴$ |
|||
└─╴$</pre> |
|||
=={{header|Perl}}== |
=={{header|Perl}}== |
||
{{trans| |
{{trans|Raku}} |
||
< |
<syntaxhighlight lang="perl">use strict; |
||
use warnings; |
use warnings; |
||
use Data::Dumper; |
use Data::Dumper; |
||
Line 429: | Line 1,229: | ||
return $h; |
return $h; |
||
} |
} |
||
print +Dumper suffix_tree suffixes 'banana$';</ |
print +Dumper suffix_tree suffixes 'banana$';</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>$VAR1 = { |
<pre>$VAR1 = { |
||
Line 447: | Line 1,247: | ||
};</pre> |
};</pre> |
||
=={{header| |
=={{header|Phix}}== |
||
{{trans|D}} |
|||
{{Works with|rakudo|2017-01}} |
|||
<!--<syntaxhighlight lang="phix">(phixonline)--> |
|||
Here is quite a naive algorithm, probably <math>O(n^2)</math>. |
|||
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> |
|||
<span style="color: #000080;font-style:italic;">-- tree nodes are simply {string substr, sequence children_idx}</span> |
|||
<span style="color: #008080;">enum</span> <span style="color: #000000;">SUB</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">CHILDREN</span><span style="color: #0000FF;">=</span><span style="color: #000000;">2</span> |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">addSuffix</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">string</span> <span style="color: #000000;">suffix</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #004080;">int</span> <span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span><span style="color: #0000FF;"><=</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">suffix</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">ch</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">suffix</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">x2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">n2</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #0000FF;">(</span><span style="color: #004600;">true</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">children</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">CHILDREN</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">x2</span><span style="color: #0000FF;">></span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000080;font-style:italic;">-- no matching child, remainder of suffix becomes new node.</span> |
|||
<span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">suffix</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..$],{}})</span> |
|||
<span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">CHILDREN</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">deep_copy</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">)&</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">n2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">x2</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n2</span><span style="color: #0000FF;">][</span><span style="color: #000000;">SUB</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]==</span><span style="color: #000000;">ch</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">x2</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #000080;font-style:italic;">-- find prefix of remaining suffix in common with child</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">prefix</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n2</span><span style="color: #0000FF;">][</span><span style="color: #000000;">SUB</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #004080;">int</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">prefix</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">suffix</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]!=</span><span style="color: #000000;">prefix</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000080;font-style:italic;">-- split n2: new node for the part in common</span> |
|||
<span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">prefix</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">j</span><span style="color: #0000FF;">],{</span><span style="color: #000000;">n2</span><span style="color: #0000FF;">}})</span> |
|||
<span style="color: #000080;font-style:italic;">-- old node loses the part in common</span> |
|||
<span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n2</span><span style="color: #0000FF;">][</span><span style="color: #000000;">SUB</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">prefix</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..$]</span> |
|||
<span style="color: #000080;font-style:italic;">-- and child idx moves to newly created node</span> |
|||
<span style="color: #000000;">n2</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">children</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">deep_copy</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">CHILDREN</span><span style="color: #0000FF;">])</span> |
|||
<span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">x2</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">n2</span> |
|||
<span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">CHILDREN</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">children</span> |
|||
<span style="color: #008080;">exit</span> <span style="color: #000080;font-style:italic;">-- continue down the tree</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">j</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">j</span> <span style="color: #000080;font-style:italic;">-- advance past part in common</span> |
|||
<span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">n2</span> <span style="color: #000080;font-style:italic;">-- continue down the tree</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">SuffixTree</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{{</span><span style="color: #008000;">""</span><span style="color: #0000FF;">,{}}}</span> |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">addSuffix</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">..$])</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">t</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
<span style="color: #008080;">procedure</span> <span style="color: #000000;">visualize</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">n</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">string</span> <span style="color: #000000;">pre</span><span style="color: #0000FF;">=</span><span style="color: #008000;">""</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">)=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"<empty>\n"</span><span style="color: #0000FF;">);</span> |
|||
<span style="color: #008080;">return</span><span style="color: #0000FF;">;</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">children</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">CHILDREN</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">)=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"- %s\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">SUB</span><span style="color: #0000FF;">]})</span> |
|||
<span style="color: #008080;">return</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"+ %s\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">n</span><span style="color: #0000FF;">][</span><span style="color: #000000;">SUB</span><span style="color: #0000FF;">]})</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">l</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">children</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">l</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #7060A8;">puts</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">pre</span><span style="color: #0000FF;">&</span><span style="color: #008000;">"+-"</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">visualize</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">,</span><span style="color: #000000;">children</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">pre</span><span style="color: #0000FF;">&</span><span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">l</span><span style="color: #0000FF;">?</span><span style="color: #008000;">" "</span><span style="color: #0000FF;">:</span><span style="color: #008000;">"| "</span><span style="color: #0000FF;">))</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">SuffixTree</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"banana$"</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">visualize</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">)</span> |
|||
<!--</syntaxhighlight>--> |
|||
{{out}} |
|||
<pre> |
|||
+ |
|||
+-- banana$ |
|||
+-+ a |
|||
| +-+ na |
|||
| | +-- na$ |
|||
| | +-- $ |
|||
| +-- $ |
|||
+-+ na |
|||
| +-- na$ |
|||
| +-- $ |
|||
+-- $ |
|||
</pre> |
|||
=={{header|Python}}== |
|||
<lang Perl 6>multi suffix-tree(Str $str) { suffix-tree flat map &flip, [\~] $str.flip.comb } |
|||
{{trans|D}} |
|||
multi suffix-tree(@a) { |
|||
<syntaxhighlight lang="python">class Node: |
|||
hash |
|||
def __init__(self, sub="", children=None): |
|||
@a == 0 ?? () !! |
|||
self.sub = sub |
|||
self.ch = children or [] |
|||
gather for @a.classify(*.substr(0, 1)) { |
|||
my $subtree = suffix-tree(grep *.chars, map *.substr(1), .value[]); |
|||
if $subtree == 1 { |
|||
my $pair = $subtree.pick; |
|||
take .key ~ $pair.key => $pair.value; |
|||
} else { |
|||
take .key => $subtree; |
|||
} |
|||
} |
|||
}</lang> |
|||
class SuffixTree: |
|||
Displaying the tree is done with the code from [[visualize a tree]]: |
|||
def __init__(self, str): |
|||
<lang Perl 6>my $tree = root => suffix-tree 'banana$'; |
|||
self.nodes = [Node()] |
|||
.say for visualize-tree $tree, *.key, *.value.list;</lang> |
|||
for i in range(len(str)): |
|||
self.addSuffix(str[i:]) |
|||
def addSuffix(self, suf): |
|||
n = 0 |
|||
i = 0 |
|||
while i < len(suf): |
|||
b = suf[i] |
|||
x2 = 0 |
|||
while True: |
|||
children = self.nodes[n].ch |
|||
if x2 == len(children): |
|||
# no matching child, remainder of suf becomes new node |
|||
n2 = len(self.nodes) |
|||
self.nodes.append(Node(suf[i:], [])) |
|||
self.nodes[n].ch.append(n2) |
|||
return |
|||
n2 = children[x2] |
|||
if self.nodes[n2].sub[0] == b: |
|||
break |
|||
x2 = x2 + 1 |
|||
# find prefix of remaining suffix in common with child |
|||
sub2 = self.nodes[n2].sub |
|||
j = 0 |
|||
while j < len(sub2): |
|||
if suf[i + j] != sub2[j]: |
|||
# split n2 |
|||
n3 = n2 |
|||
# new node for the part in common |
|||
n2 = len(self.nodes) |
|||
self.nodes.append(Node(sub2[:j], [n3])) |
|||
self.nodes[n3].sub = sub2[j:] # old node loses the part in common |
|||
self.nodes[n].ch[x2] = n2 |
|||
break # continue down the tree |
|||
j = j + 1 |
|||
i = i + j # advance past part in common |
|||
n = n2 # continue down the tree |
|||
def visualize(self): |
|||
if len(self.nodes) == 0: |
|||
print "<empty>" |
|||
return |
|||
def f(n, pre): |
|||
children = self.nodes[n].ch |
|||
if len(children) == 0: |
|||
print "--", self.nodes[n].sub |
|||
return |
|||
print "+-", self.nodes[n].sub |
|||
for c in children[:-1]: |
|||
print pre, "+-", |
|||
f(c, pre + " | ") |
|||
print pre, "+-", |
|||
f(children[-1], pre + " ") |
|||
f(0, "") |
|||
SuffixTree("banana$").visualize()</syntaxhighlight> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre>+- |
||
+- -- banana$ |
|||
├─$ |
|||
+- +- a |
|||
├─a |
|||
| +- +- na |
|||
│ ├─$ |
|||
| | +- -- na$ |
|||
│ └─na |
|||
| | +- -- $ |
|||
│ ├─$ |
|||
| +- -- $ |
|||
│ └─na$ |
|||
+- +- na |
|||
├─na |
|||
| +- -- na$ |
|||
│ ├─$ |
|||
| +- -- $ |
|||
│ └─na$ |
|||
+- -- $</pre> |
|||
=={{header|Racket}}== |
=={{header|Racket}}== |
||
Line 488: | Line 1,424: | ||
by Danny Yoo for more information on how to use suffix trees in Racket. |
by Danny Yoo for more information on how to use suffix trees in Racket. |
||
< |
<syntaxhighlight lang="racket">#lang racket |
||
(require (planet dyoo/suffixtree)) |
(require (planet dyoo/suffixtree)) |
||
(define tree (make-tree)) |
(define tree (make-tree)) |
||
Line 504: | Line 1,440: | ||
((list c ct ...) (show-node c (string-append dpth " |")) (l ct))))) |
((list c ct ...) (show-node c (string-append dpth " |")) (l ct))))) |
||
(show-node (tree-root tree) "")</ |
(show-node (tree-root tree) "")</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 518: | Line 1,454: | ||
| `-- na$ |
| `-- na$ |
||
`-- banana$</pre> |
`-- banana$</pre> |
||
=={{header|Raku}}== |
|||
(formerly Perl 6) |
|||
{{Works with|Rakudo|2018.04}} |
|||
Here is quite a naive algorithm, probably <math>O(n^2)</math>. |
|||
The display code is a variant of the [[visualize_a_tree#Raku|visualize a tree]] task code. |
|||
<syntaxhighlight lang="raku" line>multi suffix-tree(Str $str) { suffix-tree flat map &flip, [\~] $str.flip.comb } |
|||
multi suffix-tree(@a) { |
|||
hash |
|||
@a == 0 ?? () !! |
|||
@a == 1 ?? ( @a[0] => [] ) !! |
|||
gather for @a.classify(*.substr(0, 1)) { |
|||
my $subtree = suffix-tree(grep *.chars, map *.substr(1), .value[]); |
|||
if $subtree == 1 { |
|||
my $pair = $subtree.pick; |
|||
take .key ~ $pair.key => $pair.value; |
|||
} else { |
|||
take .key => $subtree; |
|||
} |
|||
} |
|||
} |
|||
my $tree = root => suffix-tree 'banana$'; |
|||
.say for visualize-tree $tree, *.key, *.value.List; |
|||
sub visualize-tree($tree, &label, &children, |
|||
:$indent = '', |
|||
:@mid = ('├─', '│ '), |
|||
:@end = ('└─', ' '), |
|||
) { |
|||
sub visit($node, *@pre) { |
|||
gather { |
|||
take @pre[0] ~ $node.&label; |
|||
my @children = sort $node.&children; |
|||
my $end = @children.end; |
|||
for @children.kv -> $_, $child { |
|||
when $end { take visit($child, (@pre[1] X~ @end)) } |
|||
default { take visit($child, (@pre[1] X~ @mid)) } |
|||
} |
|||
} |
|||
} |
|||
flat visit($tree, $indent xx 2); |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre>root |
|||
├─$ |
|||
├─a |
|||
│ ├─$ |
|||
│ └─na |
|||
│ ├─$ |
|||
│ └─na$ |
|||
├─banana$ |
|||
└─na |
|||
├─$ |
|||
└─na$</pre> |
|||
=={{header|Sidef}}== |
=={{header|Sidef}}== |
||
{{trans| |
{{trans|Raku}} |
||
< |
<syntaxhighlight lang="ruby">func suffix_tree(Str t) { |
||
suffix_tree(^t.len -> map { t.substr(_) }) |
suffix_tree(^t.len -> map { t.substr(_) }) |
||
} |
} |
||
Line 545: | Line 1,540: | ||
} |
} |
||
say suffix_tree('banana$')</ |
say suffix_tree('banana$')</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 563: | Line 1,558: | ||
) |
) |
||
) |
) |
||
</pre> |
|||
=={{header|Wren}}== |
|||
{{trans|Kotlin}} |
|||
<syntaxhighlight lang="wren">class Node { |
|||
construct new() { |
|||
_sub = "" // a substring of the input string |
|||
_ch = [] // list of child nodes |
|||
} |
|||
sub { _sub } |
|||
ch { _ch } |
|||
sub=(s) { _sub = s } |
|||
} |
|||
class SuffixTree { |
|||
construct new(str) { |
|||
_nodes = [Node.new()] |
|||
for (i in 0...str.count) addSuffix_(str[i..-1]) |
|||
} |
|||
addSuffix_(suf) { |
|||
var n = 0 |
|||
var i = 0 |
|||
while (i < suf.count) { |
|||
var b = suf[i] |
|||
var children = _nodes[n].ch |
|||
var x2 = 0 |
|||
var n2 |
|||
while (true) { |
|||
if (x2 == children.count) { |
|||
// no matching child, remainder of suf becomes new node. |
|||
n2 = _nodes.count |
|||
var nd = Node.new() |
|||
nd.sub = suf[i..-1] |
|||
_nodes.add(nd) |
|||
children.add(n2) |
|||
return |
|||
} |
|||
n2 = children[x2] |
|||
if (_nodes[n2].sub[0] == b) break |
|||
x2 = x2 + 1 |
|||
} |
|||
// find prefix of remaining suffix in common with child |
|||
var sub2 = _nodes[n2].sub |
|||
var j = 0 |
|||
while (j < sub2.count) { |
|||
if (suf[i + j] != sub2[j]) { |
|||
// split n2 |
|||
var n3 = n2 |
|||
// new node for the part in common |
|||
n2 = _nodes.count |
|||
var nd = Node.new() |
|||
nd.sub = sub2[0...j] |
|||
nd.ch.add(n3) |
|||
_nodes.add(nd) |
|||
_nodes[n3].sub = sub2[j..-1] // old node loses the part in common |
|||
_nodes[n].ch[x2] = n2 |
|||
break // continue down the tree |
|||
} |
|||
j = j + 1 |
|||
} |
|||
i = i + j // advance past part in common |
|||
n = n2 // continue down the tree |
|||
} |
|||
} |
|||
visualize() { |
|||
if (_nodes.isEmpty) { |
|||
System.print("<empty>") |
|||
return |
|||
} |
|||
var f // recursive closure |
|||
f = Fn.new { |n, pre| |
|||
var children = _nodes[n].ch |
|||
if (children.isEmpty) { |
|||
System.print("╴ %(_nodes[n].sub)") |
|||
return |
|||
} |
|||
System.print("┐ %(_nodes[n].sub)") |
|||
for (c in children[0...-1]) { |
|||
System.write(pre + "├─") |
|||
f.call(c, pre + "│ ") |
|||
} |
|||
System.write(pre + "└─") |
|||
f.call(children[-1], pre + " ") |
|||
} |
|||
f.call(0, "") |
|||
} |
|||
} |
|||
SuffixTree.new("banana$").visualize()</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
┐ |
|||
├─╴ banana$ |
|||
├─┐ a |
|||
│ ├─┐ na |
|||
│ │ ├─╴ na$ |
|||
│ │ └─╴ $ |
|||
│ └─╴ $ |
|||
├─┐ na |
|||
│ ├─╴ na$ |
|||
│ └─╴ $ |
|||
└─╴ $ |
|||
</pre> |
</pre> |
Latest revision as of 09:05, 7 May 2024
A suffix tree is a data structure commonly used in string algorithms.
Given a string S of length n, its suffix tree is a tree T such that:
- T has exactly n leaves numbered from 1 to n.
- Except for the root, every internal node has at least two children.
- Each edge of T is labelled with a non-empty substring of S.
- No two edges starting out of a node can have string labels beginning with the same character.
- The string obtained by concatenating all the string labels found on the path from the root to leaf i spells out suffix S[i..n], for i from 1 to n.
Such a tree does not exist for all strings. To ensure existence, a character that is not found in S must be appended at its end. The character '$' is traditionally used for this purpose.
For this task, build and display the suffix tree of the string "banana$". Displaying the tree can be done using the code from the visualize a tree task, but any other convenient method is accepted.
There are several ways to implement the tree data structure, for instance how edges should be labelled. Latitude is given in this matter, but notice that a simple way to do it is to label each node with the label of the edge leading to it.
The computation time for an efficient algorithm should be , but such an algorithm might be difficult to implement. An easier, algorithm is accepted.
11l
T Node
String sub
[Int] ch
F (sub, children)
.sub = sub
.ch = children
T SuffixTree
nodes = [Node(‘’, [Int]())]
F (str)
L(i) 0 .< str.len
.addSuffix(str[i..])
F addSuffix(suf)
V n = 0
V i = 0
L i < suf.len
V b = suf[i]
V x2 = 0
Int n2
L
V children = .nodes[n].ch
I x2 == children.len
n2 = .nodes.len
.nodes.append(Node(suf[i..], [Int]()))
.nodes[n].ch.append(n2)
R
n2 = children[x2]
I .nodes[n2].sub[0] == b
L.break
x2 = x2 + 1
V sub2 = .nodes[n2].sub
V j = 0
L j < sub2.len
I suf[i + j] != sub2[j]
V n3 = n2
n2 = .nodes.len
.nodes.append(Node(sub2[0 .< j], [n3]))
.nodes[n3].sub = sub2[j..]
.nodes[n].ch[x2] = n2
L.break
j = j + 1
i = i + j
n = n2
F visualize()
I .nodes.empty
print(‘<empty>’)
R
F f(Int n, String pre) -> Void
V children = @.nodes[n].ch
I children.empty
print(‘-- ’(@.nodes[n].sub))
R
print(‘+- ’(@.nodes[n].sub))
L(c) children[0 .< (len)-1]
print(pre‘ +-’, end' ‘ ’)
@f(c, pre‘ | ’)
print(pre‘ +-’, end' ‘ ’)
@f(children.last, pre‘ ’)
f(0, ‘’)
SuffixTree(‘banana$’).visualize()
- Output:
+- +- -- banana$ +- +- a | +- +- na | | +- -- na$ | | +- -- $ | +- -- $ +- +- na | +- -- na$ | +- -- $ +- -- $
C#
using System;
using System.Collections.Generic;
namespace SuffixTree {
class Node {
public string sub; // a substring of the input string
public List<int> ch = new List<int>(); // vector of child nodes
public Node() {
sub = "";
}
public Node(string sub, params int[] children) {
this.sub = sub;
ch.AddRange(children);
}
}
class SuffixTree {
readonly List<Node> nodes = new List<Node>();
public SuffixTree(string str) {
nodes.Add(new Node());
for (int i = 0; i < str.Length; i++) {
AddSuffix(str.Substring(i));
}
}
public void Visualize() {
if (nodes.Count == 0) {
Console.WriteLine("<empty>");
return;
}
void f(int n, string pre) {
var children = nodes[n].ch;
if (children.Count == 0) {
Console.WriteLine("- {0}", nodes[n].sub);
return;
}
Console.WriteLine("+ {0}", nodes[n].sub);
var it = children.GetEnumerator();
if (it.MoveNext()) {
do {
var cit = it;
if (!cit.MoveNext()) break;
Console.Write("{0}+-", pre);
f(it.Current, pre + "| ");
} while (it.MoveNext());
}
Console.Write("{0}+-", pre);
f(children[children.Count-1], pre+" ");
}
f(0, "");
}
private void AddSuffix(string suf) {
int n = 0;
int i = 0;
while (i < suf.Length) {
char b = suf[i];
int x2 = 0;
int n2;
while (true) {
var children = nodes[n].ch;
if (x2 == children.Count) {
// no matching child, remainder of suf becomes new node
n2 = nodes.Count;
nodes.Add(new Node(suf.Substring(i)));
nodes[n].ch.Add(n2);
return;
}
n2 = children[x2];
if (nodes[n2].sub[0] == b) {
break;
}
x2++;
}
// find prefix of remaining suffix in common with child
var sub2 = nodes[n2].sub;
int j = 0;
while (j < sub2.Length) {
if (suf[i + j] != sub2[j]) {
// split n2
var n3 = n2;
// new node for the part in common
n2 = nodes.Count;
nodes.Add(new Node(sub2.Substring(0, j), n3));
nodes[n3].sub = sub2.Substring(j); // old node loses the part in common
nodes[n].ch[x2] = n2;
break; // continue down the tree
}
j++;
}
i += j; // advance past part in common
n = n2; // continue down the tree
}
}
}
class Program {
static void Main() {
new SuffixTree("banana$").Visualize();
}
}
}
- Output:
+ +-- banana$ +-+ a | +-+ na | | +-- na$ | | +-- $ | +-- $ +-+ na | +-- na$ | +-- $ +-- $
C++
#include <functional>
#include <iostream>
#include <vector>
struct Node {
std::string sub = ""; // a substring of the input string
std::vector<int> ch; // vector of child nodes
Node() {
// empty
}
Node(const std::string& sub, std::initializer_list<int> children) : sub(sub) {
ch.insert(ch.end(), children);
}
};
struct SuffixTree {
std::vector<Node> nodes;
SuffixTree(const std::string& str) {
nodes.push_back(Node{});
for (size_t i = 0; i < str.length(); i++) {
addSuffix(str.substr(i));
}
}
void visualize() {
if (nodes.size() == 0) {
std::cout << "<empty>\n";
return;
}
std::function<void(int, const std::string&)> f;
f = [&](int n, const std::string & pre) {
auto children = nodes[n].ch;
if (children.size() == 0) {
std::cout << "- " << nodes[n].sub << '\n';
return;
}
std::cout << "+ " << nodes[n].sub << '\n';
auto it = std::begin(children);
if (it != std::end(children)) do {
if (std::next(it) == std::end(children)) break;
std::cout << pre << "+-";
f(*it, pre + "| ");
it = std::next(it);
} while (true);
std::cout << pre << "+-";
f(children[children.size() - 1], pre + " ");
};
f(0, "");
}
private:
void addSuffix(const std::string & suf) {
int n = 0;
size_t i = 0;
while (i < suf.length()) {
char b = suf[i];
int x2 = 0;
int n2;
while (true) {
auto children = nodes[n].ch;
if (x2 == children.size()) {
// no matching child, remainder of suf becomes new node
n2 = nodes.size();
nodes.push_back(Node(suf.substr(i), {}));
nodes[n].ch.push_back(n2);
return;
}
n2 = children[x2];
if (nodes[n2].sub[0] == b) {
break;
}
x2++;
}
// find prefix of remaining suffix in common with child
auto sub2 = nodes[n2].sub;
size_t j = 0;
while (j < sub2.size()) {
if (suf[i + j] != sub2[j]) {
// split n2
auto n3 = n2;
// new node for the part in common
n2 = nodes.size();
nodes.push_back(Node(sub2.substr(0, j), { n3 }));
nodes[n3].sub = sub2.substr(j); // old node loses the part in common
nodes[n].ch[x2] = n2;
break; // continue down the tree
}
j++;
}
i += j; // advance past part in common
n = n2; // continue down the tree
}
}
};
int main() {
SuffixTree("banana$").visualize();
}
- Output:
+ +-- banana$ +-+ a | +-+ na | | +-- na$ | | +-- $ | +-- $ +-+ na | +-- na$ | +-- $ +-- $
D
import std.stdio;
struct Node {
string sub = ""; // a substring of the input string
int[] ch; // array of child nodes
this(string sub, int[] children ...) {
this.sub = sub;
ch = children;
}
}
struct SuffixTree {
Node[] nodes;
this(string str) {
nodes ~= Node();
for (int i=0; i<str.length; ++i) {
addSuffix(str[i..$]);
}
}
private void addSuffix(string suf) {
int n = 0;
int i = 0;
while (i < suf.length) {
char b = suf[i];
int x2 = 0;
int n2;
while (true) {
auto children = nodes[n].ch;
if (x2 == children.length) {
// no matching child, remainder of suf becomes new node.
n2 = nodes.length;
nodes ~= Node(suf[i..$]);
nodes[n].ch ~= n2;
return;
}
n2 = children[x2];
if (nodes[n2].sub[0] == b) {
break;
}
x2++;
}
// find prefix of remaining suffix in common with child
auto sub2 = nodes[n2].sub;
int j = 0;
while (j < sub2.length) {
if (suf[i + j] != sub2[j]) {
// split n2
auto n3 = n2;
// new node for the part in common
n2 = nodes.length;
nodes ~= Node(sub2[0..j], n3);
nodes[n3].sub = sub2[j..$]; // old node loses the part in common
nodes[n].ch[x2] = n2;
break; // continue down the tree
}
j++;
}
i += j; // advance past part in common
n = n2; // continue down the tree
}
}
void visualize() {
if (nodes.length == 0) {
writeln("<empty>");
return;
}
void f(int n, string pre) {
auto children = nodes[n].ch;
if (children.length == 0) {
writefln("╴ %s", nodes[n].sub);
return;
}
writefln("┐ %s", nodes[n].sub);
foreach (c; children[0..$-1]) {
write(pre, "├─");
f(c, pre ~ "│ ");
}
write(pre, "└─");
f(children[$-1], pre ~ " ");
}
f(0, "");
}
}
void main() {
SuffixTree("banana$").visualize();
}
- Output:
┐ ├─╴ banana$ ├─┐ a │ ├─┐ na │ │ ├─╴ na$ │ │ └─╴ $ │ └─╴ $ ├─┐ na │ ├─╴ na$ │ └─╴ $ └─╴ $
Elixir
defmodule STree do
defstruct branch: []
defp suffixes([]), do: []
defp suffixes(w), do: [w | suffixes tl(w)]
defp lcp([], _, acc), do: acc
defp lcp(_, [], acc), do: acc
defp lcp([c | u], [a | w], acc) do
if c == a do
lcp(u, w, acc + 1)
else acc
end
end
defp g([], aw), do: [{{aw, length aw}, nil}]
defp g(cusnes, aw) do
[cusn | es] = cusnes
{cus, node} = cusn
{cu, culen} = cus
cpl = case node do
nil -> lcp cu, aw, 0
_ -> lcp (Enum.take cu, culen), aw, 0
end
x = Enum.drop cu, cpl
xlen = culen - cpl
y = Enum.drop aw, cpl
ex = {{x, xlen}, node}
ey = {{y, length y}, nil}
cond do
hd(aw) > hd(cu) -> [cusn | g(es, aw)]
hd(aw) < hd(cu) -> [{{aw, length aw}, nil} | cusnes]
nil != node && xlen == 0 -> [{cus, insert_suffix(y, node)} | es]
hd(x) < hd(y) -> [{{cu, cpl}, %STree{branch: [ex, ey]}} | es]
true -> [{{cu, cpl}, %STree{branch: [ey, ex]}} | es]
end
end
defp insert_suffix(aw, node), do: %STree{branch: g(node.branch, aw)}
def naive_insertion(t), do: List.foldl(suffixes(t), %STree{}, &insert_suffix/2)
defp f(nil, _, label), do: IO.puts("╴ #{label}")
defp f(%STree{branch: children}, pre, label) do
IO.puts "┐ #{label}"
children
|> Enum.take(length(children) - 1)
|> Enum.each(fn c ->
IO.write(pre <> "├─")
{ws, len} = elem(c, 0)
f(elem(c, 1), pre <> "│ ", Enum.join(Enum.take ws, len))
end)
IO.write(pre <> "└─")
c = List.last(children)
{ws, len} = elem(c, 0)
f(elem(c, 1), pre <> " ", Enum.join(Enum.take ws, len))
end
def visualize(n), do: f(n, "", "")
def main do
"banana$"
|> String.graphemes
|> naive_insertion
|> visualize
end
end
- Output:
┐ ├─╴ $ ├─┐ a │ ├─╴ $ │ └─┐ na │ ├─╴ $ │ └─╴ na$ ├─╴ banana$ └─┐ na ├─╴ $ └─╴ na$
Go
Vis function from Visualize_a_tree#Unicode.
package main
import "fmt"
func main() {
vis(buildTree("banana$"))
}
type tree []node
type node struct {
sub string // a substring of the input string
ch []int // list of child nodes
}
func buildTree(s string) tree {
t := tree{node{}} // root node
for i := range s {
t = t.addSuffix(s[i:])
}
return t
}
func (t tree) addSuffix(suf string) tree {
n := 0
for i := 0; i < len(suf); {
b := suf[i]
ch := t[n].ch
var x2, n2 int
for ; ; x2++ {
if x2 == len(ch) {
// no matching child, remainder of suf becomes new node.
n2 = len(t)
t = append(t, node{sub: suf[i:]})
t[n].ch = append(t[n].ch, n2)
return t
}
n2 = ch[x2]
if t[n2].sub[0] == b {
break
}
}
// find prefix of remaining suffix in common with child
sub2 := t[n2].sub
j := 0
for ; j < len(sub2); j++ {
if suf[i+j] != sub2[j] {
// split n2
n3 := n2
// new node for the part in common
n2 = len(t)
t = append(t, node{sub2[:j], []int{n3}})
t[n3].sub = sub2[j:] // old node loses the part in common
t[n].ch[x2] = n2
break // continue down the tree
}
}
i += j // advance past part in common
n = n2 // continue down the tree
}
return t
}
func vis(t tree) {
if len(t) == 0 {
fmt.Println("<empty>")
return
}
var f func(int, string)
f = func(n int, pre string) {
children := t[n].ch
if len(children) == 0 {
fmt.Println("╴", t[n].sub)
return
}
fmt.Println("┐", t[n].sub)
last := len(children) - 1
for _, ch := range children[:last] {
fmt.Print(pre, "├─")
f(ch, pre+"│ ")
}
fmt.Print(pre, "└─")
f(children[last], pre+" ")
}
f(0, "")
}
- Output:
┐ ├─╴ banana$ ├─┐ a │ ├─┐ na │ │ ├─╴ na$ │ │ └─╴ $ │ └─╴ $ ├─┐ na │ ├─╴ na$ │ └─╴ $ └─╴ $
J
Implementation:
classify=: {.@> </. ]
build_tree=:3 :0
tree=. ,:_;_;''
if. 0=#y do. tree return.end.
if. 1=#y do. tree,(#;y);0;y return.end.
for_box.classify y do.
char=. {.>{.>box
subtree=. }.build_tree }.each>box
ndx=.I.0=1&{::"1 subtree
n=.#tree
if. 1=#ndx do.
counts=. 1 + 0&{::"1 subtree
parents=. (n-1) (+*]&*) 1&{::"1 subtree
edges=. (ndx}~ <@(char,ndx&{::)) 2&{"1 subtree
tree=. tree, counts;"0 1 parents;"0 edges
else.
tree=. tree,(__;0;,char),(1;n;0) + ::]&.>"1 subtree
end.
end.
)
suffix_tree=:3 :0
assert. -.({:e.}:)y
tree=. B=:|:build_tree <\. y
((1+#y)-each {.tree),}.tree
)
Task example:
suffix_tree 'banana$'
┌──┬───────┬─┬──┬───┬─┬─┬──┬───┬─┬─┐
│__│1 │_│_ │2 │4│6│_ │3 │5│7│
├──┼───────┼─┼──┼───┼─┼─┼──┼───┼─┼─┤
│_ │0 │0│2 │3 │3│2│0 │7 │7│0│
├──┼───────┼─┼──┼───┼─┼─┼──┼───┼─┼─┤
│ │banana$│a│na│na$│$│$│na│na$│$│$│
└──┴───────┴─┴──┴───┴─┴─┴──┴───┴─┴─┘
The first row is the leaf number (_ for internal nodes).
The second row is parent index (_ for root node).
The third row is the edge's substring (empty for root node).
Visualizing, using showtree and prefixing the substring leading to each leaf with the leaf number (in brackets):
fmttree=: ;@(1&{) showtree~ {: (,~ }.`('[','] ',~":)@.(_>|))each {.
fmttree suffix_tree 'banana$'
┌─ [1] banana$
│ ┌─ [2] na$
│ ┌─ na ────┴─ [4] $
────┼─ a ─────────┴─ [6] $
│ ┌─ [3] na$
├─ na ────────┴─ [5] $
└─ [7] $
Java
import java.util.ArrayList;
import java.util.List;
public class SuffixTreeProblem {
private static class Node {
String sub = ""; // a substring of the input string
List<Integer> ch = new ArrayList<>(); // list of child nodes
}
private static class SuffixTree {
private List<Node> nodes = new ArrayList<>();
public SuffixTree(String str) {
nodes.add(new Node());
for (int i = 0; i < str.length(); ++i) {
addSuffix(str.substring(i));
}
}
private void addSuffix(String suf) {
int n = 0;
int i = 0;
while (i < suf.length()) {
char b = suf.charAt(i);
List<Integer> children = nodes.get(n).ch;
int x2 = 0;
int n2;
while (true) {
if (x2 == children.size()) {
// no matching child, remainder of suf becomes new node.
n2 = nodes.size();
Node temp = new Node();
temp.sub = suf.substring(i);
nodes.add(temp);
children.add(n2);
return;
}
n2 = children.get(x2);
if (nodes.get(n2).sub.charAt(0) == b) break;
x2++;
}
// find prefix of remaining suffix in common with child
String sub2 = nodes.get(n2).sub;
int j = 0;
while (j < sub2.length()) {
if (suf.charAt(i + j) != sub2.charAt(j)) {
// split n2
int n3 = n2;
// new node for the part in common
n2 = nodes.size();
Node temp = new Node();
temp.sub = sub2.substring(0, j);
temp.ch.add(n3);
nodes.add(temp);
nodes.get(n3).sub = sub2.substring(j); // old node loses the part in common
nodes.get(n).ch.set(x2, n2);
break; // continue down the tree
}
j++;
}
i += j; // advance past part in common
n = n2; // continue down the tree
}
}
public void visualize() {
if (nodes.isEmpty()) {
System.out.println("<empty>");
return;
}
visualize_f(0, "");
}
private void visualize_f(int n, String pre) {
List<Integer> children = nodes.get(n).ch;
if (children.isEmpty()) {
System.out.println("- " + nodes.get(n).sub);
return;
}
System.out.println("┐ " + nodes.get(n).sub);
for (int i = 0; i < children.size() - 1; i++) {
Integer c = children.get(i);
System.out.print(pre + "├─");
visualize_f(c, pre + "│ ");
}
System.out.print(pre + "└─");
visualize_f(children.get(children.size() - 1), pre + " ");
}
}
public static void main(String[] args) {
new SuffixTree("banana$").visualize();
}
}
- Output:
┐ ├─- banana$ ├─┐ a │ ├─┐ na │ │ ├─- na$ │ │ └─- $ │ └─- $ ├─┐ na │ ├─- na$ │ └─- $ └─- $
JavaScript
class Node {
sub = ''; // a substring of the input string
children = []; // list of child nodes
}
class SuffixTree {
nodes = [];
constructor(str) {
this.nodes.push(new Node());
for (let i = 0; i < str.length; ++i) {
this.addSuffix(str.slice(i));
}
}
addSuffix(suf) {
let n = 0;
let i = 0;
while (i < suf.length) {
const b = suf.charAt(i);
const children = this.nodes[n].children;
let x2 = 0;
let n2;
while (true) {
if (x2 === children.length) {
// no matching child, remainder of suf becomes new node.
n2 = this.nodes.length;
const temp = new Node();
temp.sub = suf.slice(i);
this.nodes.push(temp);
children.push(n2);
return;
}
n2 = children[x2];
if (this.nodes[n2].sub.charAt(0) === b) break;
x2++;
}
// find prefix of remaining suffix in common with child
const sub2 = this.nodes[n2].sub;
let j = 0;
while (j < sub2.length) {
if (suf.charAt(i + j) !== sub2.charAt(j)) {
// split n2
const n3 = n2;
// new node for the part in common
n2 = this.nodes.length;
const temp = new Node();
temp.sub = sub2.slice(0, j);
temp.children.push(n3);
this.nodes.push(temp);
this.nodes[n3].sub = sub2.slice(j); // old node loses the part in common
this.nodes[n].children[x2] = n2;
break; // continue down the tree
}
j++;
}
i += j; // advance past part in common
n = n2; // continue down the tree
}
}
toString() {
if (this.nodes.length === 0) {
return '<empty>';
}
return this.toString_f(0, '');
}
toString_f(n, pre) {
const children = this.nodes[n].children;
if (children.length === 0) {
return '- ' + this.nodes[n].sub + '\n';
}
let s = '┐ ' + this.nodes[n].sub + '\n';
for (let i = 0; i < children.length - 1; i++) {
const c = children[i];
s += pre + '├─';
s += this.toString_f(c, pre + '│ ');
}
s += pre + '└─';
s += this.toString_f(children[children.length - 1], pre + ' ');
return s;
}
}
const st = new SuffixTree('banana');
console.log(st.toString());
- Output:
┐ ├─- banana ├─┐ a │ └─┐ na │ └─- na └─┐ na └─- na
Julia
import Base.print
mutable struct Node
sub::String
ch::Vector{Int}
Node(str, v=Int[]) = new(str, v)
end
struct SuffixTree
nodes::Vector{Node}
function SuffixTree(s::String)
nod = [Node("", Int[])]
for i in 1:length(s)
addSuffix!(nod, s[i:end])
end
return new(nod)
end
end
function addSuffix!(tree::Vector{Node}, suf::String)
n, i = 1, 1
while i <= length(suf)
x2, n2, b = 1, 1, suf[i]
while true
children = tree[n].ch
if x2 > length(children)
push!(tree, Node(suf[i:end]))
push!(tree[n].ch, length(tree))
return
end
n2 = children[x2]
(tree[n2].sub[1] == b) && break
x2 += 1
end
sub2, j = tree[n2].sub, 0
while j < length(sub2)
if suf[i + j] != sub2[j + 1]
push!(tree, Node(sub2[1:j], [n2]))
tree[n2].sub = sub2[j+1:end]
n2 = length(tree)
tree[n].ch[x2] = n2
break
end
j += 1
end
i += j
n = n2
end
end
function Base.print(io::IO, suffixtree::SuffixTree)
function treeprint(n::Int, pre::String)
children = suffixtree.nodes[n].ch
if isempty(children)
println("╴ ", suffixtree.nodes[n].sub)
else
println("┐ ", suffixtree.nodes[n].sub)
for c in children[1:end-1]
print(pre, "├─")
treeprint(c, pre * "│ ")
end
print(pre, "└─")
treeprint(children[end], pre * " ")
end
end
if isempty(suffixtree.nodes)
println("<empty>")
else
treeprint(1, "")
end
end
println(SuffixTree("banana\$"))
- Output:
┐ ├─╴ banana$ ├─┐ a │ ├─┐ na │ │ ├─╴ na$ │ │ └─╴ $ │ └─╴ $ ├─┐ na │ ├─╴ na$ │ └─╴ $ └─╴ $
Kotlin
// version 1.1.3
class Node {
var sub = "" // a substring of the input string
var ch = mutableListOf<Int>() // list of child nodes
}
class SuffixTree(val str: String) {
val nodes = mutableListOf<Node>(Node())
init {
for (i in 0 until str.length) addSuffix(str.substring(i))
}
private fun addSuffix(suf: String) {
var n = 0
var i = 0
while (i < suf.length) {
val b = suf[i]
val children = nodes[n].ch
var x2 = 0
var n2: Int
while (true) {
if (x2 == children.size) {
// no matching child, remainder of suf becomes new node.
n2 = nodes.size
nodes.add(Node().apply { sub = suf.substring(i) } )
children.add(n2)
return
}
n2 = children[x2]
if (nodes[n2].sub[0] == b) break
x2++
}
// find prefix of remaining suffix in common with child
val sub2 = nodes[n2].sub
var j = 0
while (j < sub2.length) {
if (suf[i + j] != sub2[j]) {
// split n2
val n3 = n2
// new node for the part in common
n2 = nodes.size
nodes.add(Node().apply {
sub = sub2.substring(0, j)
ch.add(n3)
})
nodes[n3].sub = sub2.substring(j) // old node loses the part in common
nodes[n].ch[x2] = n2
break // continue down the tree
}
j++
}
i += j // advance past part in common
n = n2 // continue down the tree
}
}
fun visualize() {
if (nodes.isEmpty()) {
println("<empty>")
return
}
fun f(n: Int, pre: String) {
val children = nodes[n].ch
if (children.isEmpty()) {
println("╴ ${nodes[n].sub}")
return
}
println("┐ ${nodes[n].sub}")
for (c in children.dropLast(1)) {
print(pre + "├─")
f(c, pre + "│ ")
}
print(pre + "└─")
f(children.last(), pre + " ")
}
f(0, "")
}
}
fun main(args: Array<String>) {
SuffixTree("banana$").visualize()
}
- Output:
┐ ├─╴ banana$ ├─┐ a │ ├─┐ na │ │ ├─╴ na$ │ │ └─╴ $ │ └─╴ $ ├─┐ na │ ├─╴ na$ │ └─╴ $ └─╴ $
Nim
type
Tree = seq[Node]
Node = object
sub: string # a substring of the input string.
ch: seq[int] # list of child nodes.
proc addSuffix(t: var Tree; suf: string) =
var n, i = 0
while i < suf.len:
let b = suf[i]
let ch = t[n].ch
var x2, n2: int
while true:
if x2 == ch.len:
# No matching child, remainder of "suf" becomes new node.
n2 = t.len
t.add Node(sub: suf[i..^1])
t[n].ch.add n2
return
n2 = ch[x2]
if t[n2].sub[0] == b: break
inc x2
# Find prefix of remaining suffix in common with child.
let sub2 = t[n2].sub
var j = 0
while j < sub2.len:
if suf[i+j] != sub2[j]:
# Split "sub2".
let n3 = n2
# New node for the part in common.
n2 = t.len
t.add Node(sub: sub2[0..<j], ch: @[n3])
t[n3].sub = sub2[j..^1] # Old node loses the part in common.
t[n].ch[x2] = n2
break # Continue down the tree.
inc j
inc i, j # Advance past part in common.
n = n2 # Continue down the tree.
func newTree(s: string): Tree =
result.add Node() # root node.
for i in 0..s.high:
result.addSuffix s[i..^1]
proc vis(t: Tree) =
if t.len == 0:
echo "<empty>"
return
proc f(n: int; pre: string) =
let children = t[n].ch
if children.len == 0:
echo "╴", t[n].sub
return
echo "┐", t[n].sub
for i in 0..<children.high:
stdout.write pre, "├─"
f(children[i], pre & "│ ")
stdout.write pre, "└─"
f(children[^1], pre & " ")
f(0, "")
newTree("banana$").vis()
- Output:
┐ ├─╴banana$ ├─┐a │ ├─┐na │ │ ├─╴na$ │ │ └─╴$ │ └─╴$ ├─┐na │ ├─╴na$ │ └─╴$ └─╴$
Perl
use strict;
use warnings;
use Data::Dumper;
sub classify {
my $h = {};
for (@_) { push @{$h->{substr($_,0,1)}}, $_ }
return $h;
}
sub suffixes {
my $str = shift;
map { substr $str, $_ } 0 .. length($str) - 1;
}
sub suffix_tree {
return +{} if @_ == 0;
return +{ $_[0] => +{} } if @_ == 1;
my $h = {};
my $classif = classify @_;
for my $key (keys %$classif) {
my $subtree = suffix_tree(
map { substr $_, 1 } @{$classif->{$key}}
);
my @subkeys = keys %$subtree;
if (@subkeys == 1) {
my ($subkey) = @subkeys;
$h->{"$key$subkey"} = $subtree->{$subkey};
} else { $h->{$key} = $subtree }
}
return $h;
}
print +Dumper suffix_tree suffixes 'banana$';
- Output:
$VAR1 = { '$' => {}, 'a' => { '$' => {}, 'na' => { 'na$' => {}, '$' => {} } }, 'banana$' => {}, 'na' => { 'na$' => {}, '$' => {} } };
Phix
with javascript_semantics -- tree nodes are simply {string substr, sequence children_idx} enum SUB=1, CHILDREN=2 function addSuffix(sequence t, string suffix) int n = 1, i = 1 while i<=length(suffix) do integer ch = suffix[i], x2 = 1, n2 while (true) do sequence children = t[n][CHILDREN] if x2>length(children) then -- no matching child, remainder of suffix becomes new node. t = append(t,{suffix[i..$],{}}) t[n][CHILDREN] = deep_copy(children)&length(t) return t end if n2 = children[x2] if t[n2][SUB][1]==ch then exit end if x2 += 1 end while -- find prefix of remaining suffix in common with child string prefix = t[n2][SUB] int j = 0 while j<length(prefix) do if suffix[i+j]!=prefix[j+1] then -- split n2: new node for the part in common t = append(t,{prefix[1..j],{n2}}) -- old node loses the part in common t[n2][SUB] = prefix[j+1..$] -- and child idx moves to newly created node n2 = length(t) sequence children = deep_copy(t[n][CHILDREN]) children[x2] = n2 t[n][CHILDREN] = children exit -- continue down the tree end if j += 1 end while i += j -- advance past part in common n = n2 -- continue down the tree end while return t end function function SuffixTree(string s) sequence t = {{"",{}}} for i=1 to length(s) do t = addSuffix(t,s[i..$]) end for return t end function procedure visualize(sequence t, integer n=1, string pre="") if length(t)=0 then printf(1,"<empty>\n"); return; end if sequence children = t[n][CHILDREN] if length(children)=0 then printf(1,"- %s\n", {t[n][SUB]}) return end if printf(1,"+ %s\n", {t[n][SUB]}) integer l = length(children) for i=1 to l do puts(1,pre&"+-") visualize(t,children[i],pre&iff(i=l?" ":"| ")) end for end procedure sequence t = SuffixTree("banana$") visualize(t)
- Output:
+ +-- banana$ +-+ a | +-+ na | | +-- na$ | | +-- $ | +-- $ +-+ na | +-- na$ | +-- $ +-- $
Python
class Node:
def __init__(self, sub="", children=None):
self.sub = sub
self.ch = children or []
class SuffixTree:
def __init__(self, str):
self.nodes = [Node()]
for i in range(len(str)):
self.addSuffix(str[i:])
def addSuffix(self, suf):
n = 0
i = 0
while i < len(suf):
b = suf[i]
x2 = 0
while True:
children = self.nodes[n].ch
if x2 == len(children):
# no matching child, remainder of suf becomes new node
n2 = len(self.nodes)
self.nodes.append(Node(suf[i:], []))
self.nodes[n].ch.append(n2)
return
n2 = children[x2]
if self.nodes[n2].sub[0] == b:
break
x2 = x2 + 1
# find prefix of remaining suffix in common with child
sub2 = self.nodes[n2].sub
j = 0
while j < len(sub2):
if suf[i + j] != sub2[j]:
# split n2
n3 = n2
# new node for the part in common
n2 = len(self.nodes)
self.nodes.append(Node(sub2[:j], [n3]))
self.nodes[n3].sub = sub2[j:] # old node loses the part in common
self.nodes[n].ch[x2] = n2
break # continue down the tree
j = j + 1
i = i + j # advance past part in common
n = n2 # continue down the tree
def visualize(self):
if len(self.nodes) == 0:
print "<empty>"
return
def f(n, pre):
children = self.nodes[n].ch
if len(children) == 0:
print "--", self.nodes[n].sub
return
print "+-", self.nodes[n].sub
for c in children[:-1]:
print pre, "+-",
f(c, pre + " | ")
print pre, "+-",
f(children[-1], pre + " ")
f(0, "")
SuffixTree("banana$").visualize()
- Output:
+- +- -- banana$ +- +- a | +- +- na | | +- -- na$ | | +- -- $ | +- -- $ +- +- na | +- -- na$ | +- -- $ +- -- $
Racket
See Suffix trees with Ukkonen’s algorithm by Danny Yoo for more information on how to use suffix trees in Racket.
#lang racket
(require (planet dyoo/suffixtree))
(define tree (make-tree))
(tree-add! tree (string->label "banana$"))
(define (show-node nd dpth)
(define children (node-children nd))
(printf "~a~a ~a~%" (match dpth
[(regexp #px"(.*) $" (list _ d)) (string-append d "`")]
[else else]) (if (null? children) "--" "-+") (label->string (node-up-label nd)))
(let l ((children children))
(match children
((list) (void))
((list c) (show-node c (string-append dpth " ")))
((list c ct ...) (show-node c (string-append dpth " |")) (l ct)))))
(show-node (tree-root tree) "")
- Output:
-+ |-- $ |-+ a | |-- $ | `-+ na | |-- $ | `-- na$ |-+ na | |-- $ | `-- na$ `-- banana$
Raku
(formerly Perl 6)
Here is quite a naive algorithm, probably .
The display code is a variant of the visualize a tree task code.
multi suffix-tree(Str $str) { suffix-tree flat map &flip, [\~] $str.flip.comb }
multi suffix-tree(@a) {
hash
@a == 0 ?? () !!
@a == 1 ?? ( @a[0] => [] ) !!
gather for @a.classify(*.substr(0, 1)) {
my $subtree = suffix-tree(grep *.chars, map *.substr(1), .value[]);
if $subtree == 1 {
my $pair = $subtree.pick;
take .key ~ $pair.key => $pair.value;
} else {
take .key => $subtree;
}
}
}
my $tree = root => suffix-tree 'banana$';
.say for visualize-tree $tree, *.key, *.value.List;
sub visualize-tree($tree, &label, &children,
:$indent = '',
:@mid = ('├─', '│ '),
:@end = ('└─', ' '),
) {
sub visit($node, *@pre) {
gather {
take @pre[0] ~ $node.&label;
my @children = sort $node.&children;
my $end = @children.end;
for @children.kv -> $_, $child {
when $end { take visit($child, (@pre[1] X~ @end)) }
default { take visit($child, (@pre[1] X~ @mid)) }
}
}
}
flat visit($tree, $indent xx 2);
}
- Output:
root ├─$ ├─a │ ├─$ │ └─na │ ├─$ │ └─na$ ├─banana$ └─na ├─$ └─na$
Sidef
func suffix_tree(Str t) {
suffix_tree(^t.len -> map { t.substr(_) })
}
func suffix_tree(a {.len == 1}) {
Hash(a[0] => nil)
}
func suffix_tree(Arr a) {
var h = Hash()
for k,v in (a.group_by { .char(0) }) {
var subtree = suffix_tree(v.map { .substr(1) })
var subkeys = subtree.keys
if (subkeys.len == 1) {
var subk = subkeys[0]
h{k + subk} = subtree{subk}
}
else {
h{k} = subtree
}
}
return h
}
say suffix_tree('banana$')
- Output:
Hash( "$" => nil, "a" => Hash( "$" => nil, "na" => Hash( "$" => nil, "na$" => nil ) ), "banana$" => nil, "na" => Hash( "$" => nil, "na$" => nil ) )
Wren
class Node {
construct new() {
_sub = "" // a substring of the input string
_ch = [] // list of child nodes
}
sub { _sub }
ch { _ch }
sub=(s) { _sub = s }
}
class SuffixTree {
construct new(str) {
_nodes = [Node.new()]
for (i in 0...str.count) addSuffix_(str[i..-1])
}
addSuffix_(suf) {
var n = 0
var i = 0
while (i < suf.count) {
var b = suf[i]
var children = _nodes[n].ch
var x2 = 0
var n2
while (true) {
if (x2 == children.count) {
// no matching child, remainder of suf becomes new node.
n2 = _nodes.count
var nd = Node.new()
nd.sub = suf[i..-1]
_nodes.add(nd)
children.add(n2)
return
}
n2 = children[x2]
if (_nodes[n2].sub[0] == b) break
x2 = x2 + 1
}
// find prefix of remaining suffix in common with child
var sub2 = _nodes[n2].sub
var j = 0
while (j < sub2.count) {
if (suf[i + j] != sub2[j]) {
// split n2
var n3 = n2
// new node for the part in common
n2 = _nodes.count
var nd = Node.new()
nd.sub = sub2[0...j]
nd.ch.add(n3)
_nodes.add(nd)
_nodes[n3].sub = sub2[j..-1] // old node loses the part in common
_nodes[n].ch[x2] = n2
break // continue down the tree
}
j = j + 1
}
i = i + j // advance past part in common
n = n2 // continue down the tree
}
}
visualize() {
if (_nodes.isEmpty) {
System.print("<empty>")
return
}
var f // recursive closure
f = Fn.new { |n, pre|
var children = _nodes[n].ch
if (children.isEmpty) {
System.print("╴ %(_nodes[n].sub)")
return
}
System.print("┐ %(_nodes[n].sub)")
for (c in children[0...-1]) {
System.write(pre + "├─")
f.call(c, pre + "│ ")
}
System.write(pre + "└─")
f.call(children[-1], pre + " ")
}
f.call(0, "")
}
}
SuffixTree.new("banana$").visualize()
- Output:
┐ ├─╴ banana$ ├─┐ a │ ├─┐ na │ │ ├─╴ na$ │ │ └─╴ $ │ └─╴ $ ├─┐ na │ ├─╴ na$ │ └─╴ $ └─╴ $