001 /*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2011 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
019 */
020 package org.sonar.duplications.detector.suffixtree;
021
022 import java.util.Collections;
023 import java.util.List;
024
025 import org.sonar.duplications.block.Block;
026 import org.sonar.duplications.detector.ContainsInComparator;
027 import org.sonar.duplications.index.CloneGroup;
028 import org.sonar.duplications.index.ClonePart;
029 import org.sonar.duplications.utils.SortedListsUtils;
030
031 import com.google.common.collect.Lists;
032
033 /**
034 * Implementation of {@link Search.Collector}, which constructs {@link CloneGroup}s.
035 */
036 public class DuplicationsCollector extends Search.Collector {
037
038 private final TextSet text;
039 private final String originResourceId;
040
041 private final List<CloneGroup> filtered = Lists.newArrayList();
042
043 private int length;
044 private ClonePart origin;
045 private List<ClonePart> parts;
046
047 public DuplicationsCollector(TextSet text) {
048 this.text = text;
049 this.originResourceId = text.getBlock(0).getResourceId();
050 }
051
052 /**
053 * @return current result
054 */
055 public List<CloneGroup> getResult() {
056 return filtered;
057 }
058
059 @Override
060 public void startOfGroup(int size, int length) {
061 this.length = length;
062 this.parts = Lists.newArrayListWithCapacity(size);
063 }
064
065 /**
066 * Constructs ClonePart and saves it for future processing in {@link #endOfGroup()}.
067 *
068 * @param start number of first block from text for this part
069 * @param end number of last block from text for this part
070 * @param len number of blocks in this part
071 */
072 @Override
073 public void part(int start, int end) {
074 Block firstBlock = text.getBlock(start);
075 Block lastBlock = text.getBlock(end - 1);
076
077 ClonePart part = new ClonePart(
078 firstBlock.getResourceId(),
079 firstBlock.getIndexInFile(),
080 firstBlock.getFirstLineNumber(),
081 lastBlock.getLastLineNumber());
082
083 // TODO Godin: maybe use FastStringComparator here ?
084 if (originResourceId.equals(part.getResourceId())) { // part from origin
085 if (origin == null) {
086 origin = part;
087 } else if (part.getUnitStart() < origin.getUnitStart()) {
088 origin = part;
089 }
090 }
091
092 parts.add(part);
093 }
094
095 /**
096 * Constructs CloneGroup and saves it.
097 */
098 @Override
099 public void endOfGroup() {
100 Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR);
101 CloneGroup group = new CloneGroup(length, origin, parts);
102 filter(group);
103
104 parts = null;
105 origin = null;
106 }
107
108 /**
109 * Saves CloneGroup, if it is not included into previously saved.
110 * <p>
111 * Current CloneGroup can not include none of CloneGroup, which were constructed before.
112 * Proof:
113 * According to an order of visiting nodes in suffix tree - length of earlier >= length of current.
114 * If length of earlier > length of current, then earlier not contained in current.
115 * If length of earlier = length of current, then earlier can be contained in current only
116 * when current has exactly the same and maybe some additional CloneParts as earlier,
117 * what in his turn will mean that two inner-nodes on same depth will satisfy condition
118 * current.startSize <= earlier.startSize <= earlier.endSize <= current.endSize , which is not possible for different inner-nodes on same depth.
119 * </p>
120 * Thus this method checks only that none of CloneGroup, which was constructed before, does not include current CloneGroup.
121 */
122 private void filter(CloneGroup current) {
123 for (CloneGroup earlier : filtered) {
124 if (containsIn(current, earlier)) {
125 return;
126 }
127 }
128 filtered.add(current);
129 }
130
131 /**
132 * Checks that second CloneGroup includes first one.
133 * <p>
134 * CloneGroup A is included in another CloneGroup B, if every part pA from A has part pB in B,
135 * which satisfy the conditions:
136 * <pre>
137 * (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd)
138 * </pre>
139 * And all resourcesId from B exactly the same as all resourceId from A, which means that also every part pB from B has part pA in A,
140 * which satisfy the condition:
141 * <pre>
142 * pB.resourceId == pA.resourceId
143 * </pre>
144 * Inclusion is the partial order, thus this relation is:
145 * <ul>
146 * <li>reflexive - A in A</li>
147 * <li>transitive - (A in B) and (B in C) => (A in C)</li>
148 * <li>antisymmetric - (A in B) and (B in A) <=> (A = B)</li>
149 * </ul>
150 * </p>
151 * <p>
152 * This method uses the fact that all parts already sorted by resourceId and unitStart (see {@link ContainsInComparator#CLONEPART_COMPARATOR}),
153 * so running time - O(|A|+|B|).
154 * </p>
155 */
156 private static boolean containsIn(CloneGroup first, CloneGroup second) {
157 List<ClonePart> firstParts = first.getCloneParts();
158 List<ClonePart> secondParts = second.getCloneParts();
159 // TODO Godin: according to tests seems that if first part of condition is true, then second part can not be false
160 // if this can be proved, then second part can be removed
161 return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(second.getCloneUnitLength(), first.getCloneUnitLength()))
162 && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR);
163 }
164
165 }