001/*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2012 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019 */
020package org.sonar.duplications.detector.suffixtree;
021
022import com.google.common.collect.Lists;
023
024import java.util.*;
025
026public final class Search {
027
028  private final SuffixTree tree;
029  private final TextSet text;
030  private final Collector reporter;
031
032  private final List<Integer> list = Lists.newArrayList();
033  private final List<Node> innerNodes = Lists.newArrayList();
034
035  public static void perform(TextSet text, Collector reporter) {
036    new Search(SuffixTree.create(text), text, reporter).compute();
037  }
038
039  private Search(SuffixTree tree, TextSet text, Collector reporter) {
040    this.tree = tree;
041    this.text = text;
042    this.reporter = reporter;
043  }
044
045  private void compute() {
046    // O(N)
047    dfs();
048
049    // O(N * log(N))
050    Collections.sort(innerNodes, DEPTH_COMPARATOR);
051
052    // O(N)
053    visitInnerNodes();
054  }
055
056  private static final Comparator<Node> DEPTH_COMPARATOR = new Comparator<Node>() {
057    public int compare(Node o1, Node o2) {
058      return o2.depth - o1.depth;
059    }
060  };
061
062  /**
063   * Depth-first search (DFS).
064   */
065  private void dfs() {
066    LinkedList<Node> stack = Lists.newLinkedList();
067    stack.add(tree.getRootNode());
068    while (!stack.isEmpty()) {
069      Node node = stack.removeLast();
070      node.startSize = list.size();
071      if (node.getEdges().isEmpty()) { // leaf
072        list.add(node.depth);
073        node.endSize = list.size();
074      } else {
075        if (!node.equals(tree.getRootNode())) { // inner node = not leaf and not root
076          innerNodes.add(node);
077        }
078        for (Edge edge : node.getEdges()) {
079          Node endNode = edge.getEndNode();
080          endNode.depth = node.depth + edge.getSpan() + 1;
081          stack.addLast(endNode);
082        }
083      }
084    }
085    // At this point all inner nodes are ordered by the time of entering, so we visit them from last to first
086    ListIterator<Node> iterator = innerNodes.listIterator(innerNodes.size());
087    while (iterator.hasPrevious()) {
088      Node node = iterator.previous();
089      int max = -1;
090      for (Edge edge : node.getEdges()) {
091        max = Math.max(edge.getEndNode().endSize, max);
092      }
093      node.endSize = max;
094    }
095  }
096
097  /**
098   * Each inner-node represents prefix of some suffixes, thus substring of text.
099   */
100  private void visitInnerNodes() {
101    for (Node node : innerNodes) {
102      if (containsOrigin(node)) {
103        report(node);
104      }
105    }
106  }
107
108  /**
109   * TODO Godin: in fact computations here are the same as in {@link #report(Node)},
110   * so maybe would be better to remove this duplication,
111   * however it should be noted that this check can't be done in {@link Collector#endOfGroup()},
112   * because it might lead to creation of unnecessary new objects
113   */
114  private boolean containsOrigin(Node node) {
115    for (int i = node.startSize; i < node.endSize; i++) {
116      int start = tree.text.length() - list.get(i);
117      int end = start + node.depth;
118      if (text.isInsideOrigin(end)) {
119        return true;
120      }
121    }
122    return false;
123  }
124
125  private void report(Node node) {
126    reporter.startOfGroup(node.endSize - node.startSize, node.depth);
127    for (int i = node.startSize; i < node.endSize; i++) {
128      int start = tree.text.length() - list.get(i);
129      int end = start + node.depth;
130      reporter.part(start, end);
131    }
132    reporter.endOfGroup();
133  }
134
135  public abstract static class Collector {
136
137    /**
138     * Invoked at the beginning of processing for current node.
139     * <p>
140     * Length - is a depth of node. And nodes are visited in descending order of depth,
141     * thus we guaranty that length will not increase between two sequential calls of this method
142     * (can be equal or less than previous value).
143     * </p>
144     *
145     * @param size number of parts in group
146     * @param length length of each part in group
147     */
148    abstract void startOfGroup(int size, int length);
149
150    /**
151     * Invoked as many times as leaves in the subtree, where current node is root.
152     *
153     * @param start start position in generalised text
154     * @param end end position in generalised text
155     */
156    abstract void part(int start, int end);
157
158    /**
159     * Invoked at the end of processing for current node.
160     */
161    abstract void endOfGroup();
162
163  }
164
165}