001 /* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2012 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020 package org.sonar.duplications.detector.suffixtree; 021 022 import com.google.common.collect.Lists; 023 024 import java.util.*; 025 026 public final class Search { 027 028 private final SuffixTree tree; 029 private final TextSet text; 030 private final Collector reporter; 031 032 private final List<Integer> list = Lists.newArrayList(); 033 private final List<Node> innerNodes = Lists.newArrayList(); 034 035 public static void perform(TextSet text, Collector reporter) { 036 new Search(SuffixTree.create(text), text, reporter).compute(); 037 } 038 039 private Search(SuffixTree tree, TextSet text, Collector reporter) { 040 this.tree = tree; 041 this.text = text; 042 this.reporter = reporter; 043 } 044 045 private void compute() { 046 // O(N) 047 dfs(); 048 049 // O(N * log(N)) 050 Collections.sort(innerNodes, DEPTH_COMPARATOR); 051 052 // O(N) 053 visitInnerNodes(); 054 } 055 056 private static final Comparator<Node> DEPTH_COMPARATOR = new Comparator<Node>() { 057 public int compare(Node o1, Node o2) { 058 return o2.depth - o1.depth; 059 } 060 }; 061 062 /** 063 * Depth-first search (DFS). 064 */ 065 private void dfs() { 066 LinkedList<Node> stack = Lists.newLinkedList(); 067 stack.add(tree.getRootNode()); 068 while (!stack.isEmpty()) { 069 Node node = stack.removeLast(); 070 node.startSize = list.size(); 071 if (node.getEdges().isEmpty()) { // leaf 072 list.add(node.depth); 073 node.endSize = list.size(); 074 } else { 075 if (!node.equals(tree.getRootNode())) { // inner node = not leaf and not root 076 innerNodes.add(node); 077 } 078 for (Edge edge : node.getEdges()) { 079 Node endNode = edge.getEndNode(); 080 endNode.depth = node.depth + edge.getSpan() + 1; 081 stack.addLast(endNode); 082 } 083 } 084 } 085 // At this point all inner nodes are ordered by the time of entering, so we visit them from last to first 086 ListIterator<Node> iterator = innerNodes.listIterator(innerNodes.size()); 087 while (iterator.hasPrevious()) { 088 Node node = iterator.previous(); 089 int max = -1; 090 for (Edge edge : node.getEdges()) { 091 max = Math.max(edge.getEndNode().endSize, max); 092 } 093 node.endSize = max; 094 } 095 } 096 097 /** 098 * Each inner-node represents prefix of some suffixes, thus substring of text. 099 */ 100 private void visitInnerNodes() { 101 for (Node node : innerNodes) { 102 if (containsOrigin(node)) { 103 report(node); 104 } 105 } 106 } 107 108 /** 109 * TODO Godin: in fact computations here are the same as in {@link #report(Node)}, 110 * so maybe would be better to remove this duplication, 111 * however it should be noted that this check can't be done in {@link Collector#endOfGroup()}, 112 * because it might lead to creation of unnecessary new objects 113 */ 114 private boolean containsOrigin(Node node) { 115 for (int i = node.startSize; i < node.endSize; i++) { 116 int start = tree.text.length() - list.get(i); 117 int end = start + node.depth; 118 if (text.isInsideOrigin(end)) { 119 return true; 120 } 121 } 122 return false; 123 } 124 125 private void report(Node node) { 126 reporter.startOfGroup(node.endSize - node.startSize, node.depth); 127 for (int i = node.startSize; i < node.endSize; i++) { 128 int start = tree.text.length() - list.get(i); 129 int end = start + node.depth; 130 reporter.part(start, end); 131 } 132 reporter.endOfGroup(); 133 } 134 135 public abstract static class Collector { 136 137 /** 138 * Invoked at the beginning of processing for current node. 139 * <p> 140 * Length - is a depth of node. And nodes are visited in descending order of depth, 141 * thus we guaranty that length will not increase between two sequential calls of this method 142 * (can be equal or less than previous value). 143 * </p> 144 * 145 * @param size number of parts in group 146 * @param length length of each part in group 147 */ 148 abstract void startOfGroup(int size, int length); 149 150 /** 151 * Invoked as many times as leaves in the subtree, where current node is root. 152 * 153 * @param start start position in generalised text 154 * @param end end position in generalised text 155 */ 156 abstract void part(int start, int end); 157 158 /** 159 * Invoked at the end of processing for current node. 160 */ 161 abstract void endOfGroup(); 162 163 } 164 165 }