001    /*
002     * Sonar, open source software quality management tool.
003     * Copyright (C) 2008-2012 SonarSource
004     * mailto:contact AT sonarsource DOT com
005     *
006     * Sonar is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 3 of the License, or (at your option) any later version.
010     *
011     * Sonar is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public
017     * License along with Sonar; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019     */
020    package org.sonar.duplications.detector.suffixtree;
021    
022    import com.google.common.collect.Lists;
023    
024    import java.util.*;
025    
026    public final class Search {
027    
028      private final SuffixTree tree;
029      private final TextSet text;
030      private final Collector reporter;
031    
032      private final List<Integer> list = Lists.newArrayList();
033      private final List<Node> innerNodes = Lists.newArrayList();
034    
035      public static void perform(TextSet text, Collector reporter) {
036        new Search(SuffixTree.create(text), text, reporter).compute();
037      }
038    
039      private Search(SuffixTree tree, TextSet text, Collector reporter) {
040        this.tree = tree;
041        this.text = text;
042        this.reporter = reporter;
043      }
044    
045      private void compute() {
046        // O(N)
047        dfs();
048    
049        // O(N * log(N))
050        Collections.sort(innerNodes, DEPTH_COMPARATOR);
051    
052        // O(N)
053        visitInnerNodes();
054      }
055    
056      private static final Comparator<Node> DEPTH_COMPARATOR = new Comparator<Node>() {
057        public int compare(Node o1, Node o2) {
058          return o2.depth - o1.depth;
059        }
060      };
061    
062      /**
063       * Depth-first search (DFS).
064       */
065      private void dfs() {
066        LinkedList<Node> stack = Lists.newLinkedList();
067        stack.add(tree.getRootNode());
068        while (!stack.isEmpty()) {
069          Node node = stack.removeLast();
070          node.startSize = list.size();
071          if (node.getEdges().isEmpty()) { // leaf
072            list.add(node.depth);
073            node.endSize = list.size();
074          } else {
075            if (!node.equals(tree.getRootNode())) { // inner node = not leaf and not root
076              innerNodes.add(node);
077            }
078            for (Edge edge : node.getEdges()) {
079              Node endNode = edge.getEndNode();
080              endNode.depth = node.depth + edge.getSpan() + 1;
081              stack.addLast(endNode);
082            }
083          }
084        }
085        // At this point all inner nodes are ordered by the time of entering, so we visit them from last to first
086        ListIterator<Node> iterator = innerNodes.listIterator(innerNodes.size());
087        while (iterator.hasPrevious()) {
088          Node node = iterator.previous();
089          int max = -1;
090          for (Edge edge : node.getEdges()) {
091            max = Math.max(edge.getEndNode().endSize, max);
092          }
093          node.endSize = max;
094        }
095      }
096    
097      /**
098       * Each inner-node represents prefix of some suffixes, thus substring of text.
099       */
100      private void visitInnerNodes() {
101        for (Node node : innerNodes) {
102          if (containsOrigin(node)) {
103            report(node);
104          }
105        }
106      }
107    
108      /**
109       * TODO Godin: in fact computations here are the same as in {@link #report(Node)},
110       * so maybe would be better to remove this duplication,
111       * however it should be noted that this check can't be done in {@link Collector#endOfGroup()},
112       * because it might lead to creation of unnecessary new objects
113       */
114      private boolean containsOrigin(Node node) {
115        for (int i = node.startSize; i < node.endSize; i++) {
116          int start = tree.text.length() - list.get(i);
117          int end = start + node.depth;
118          if (text.isInsideOrigin(end)) {
119            return true;
120          }
121        }
122        return false;
123      }
124    
125      private void report(Node node) {
126        reporter.startOfGroup(node.endSize - node.startSize, node.depth);
127        for (int i = node.startSize; i < node.endSize; i++) {
128          int start = tree.text.length() - list.get(i);
129          int end = start + node.depth;
130          reporter.part(start, end);
131        }
132        reporter.endOfGroup();
133      }
134    
135      public abstract static class Collector {
136    
137        /**
138         * Invoked at the beginning of processing for current node.
139         * <p>
140         * Length - is a depth of node. And nodes are visited in descending order of depth,
141         * thus we guaranty that length will not increase between two sequential calls of this method
142         * (can be equal or less than previous value).
143         * </p>
144         *
145         * @param size number of parts in group
146         * @param length length of each part in group
147         */
148        abstract void startOfGroup(int size, int length);
149    
150        /**
151         * Invoked as many times as leaves in the subtree, where current node is root.
152         *
153         * @param start start position in generalised text
154         * @param end end position in generalised text
155         */
156        abstract void part(int start, int end);
157    
158        /**
159         * Invoked at the end of processing for current node.
160         */
161        abstract void endOfGroup();
162    
163      }
164    
165    }