001 /*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2012 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
019 */
020 package org.sonar.duplications.detector.suffixtree;
021
022 import com.google.common.collect.Lists;
023 import org.sonar.duplications.block.Block;
024 import org.sonar.duplications.detector.ContainsInComparator;
025 import org.sonar.duplications.index.CloneGroup;
026 import org.sonar.duplications.index.ClonePart;
027 import org.sonar.duplications.utils.SortedListsUtils;
028
029 import java.util.Collections;
030 import java.util.List;
031
032 /**
033 * Implementation of {@link Search.Collector}, which constructs {@link CloneGroup}s.
034 */
035 public class DuplicationsCollector extends Search.Collector {
036
037 private final TextSet text;
038 private final String originResourceId;
039
040 private final List<CloneGroup> filtered = Lists.newArrayList();
041
042 private int length;
043 private int count;
044 private int[][] blockNumbers;
045
046 public DuplicationsCollector(TextSet text) {
047 this.text = text;
048 this.originResourceId = text.getBlock(0).getResourceId();
049 }
050
051 /**
052 * @return current result
053 */
054 public List<CloneGroup> getResult() {
055 return filtered;
056 }
057
058 @Override
059 public void startOfGroup(int size, int length) {
060 this.blockNumbers = new int[size][2];
061 this.length = length;
062 }
063
064 /**
065 * Constructs ClonePart and saves it for future processing in {@link #endOfGroup()}.
066 *
067 * @param start number of first block from text for this part
068 * @param end number of last block from text for this part
069 * @param len number of blocks in this part
070 */
071 @Override
072 public void part(int start, int end) {
073 blockNumbers[count][0] = start;
074 blockNumbers[count][1] = end - 1;
075 count++;
076 }
077
078 /**
079 * Constructs CloneGroup and saves it.
080 */
081 @Override
082 public void endOfGroup() {
083 ClonePart origin = null;
084
085 CloneGroup.Builder builder = CloneGroup.builder().setLength(length);
086
087 List<ClonePart> parts = Lists.newArrayListWithCapacity(count);
088 for (int[] b : blockNumbers) {
089 Block firstBlock = text.getBlock(b[0]);
090 Block lastBlock = text.getBlock(b[1]);
091 ClonePart part = new ClonePart(
092 firstBlock.getResourceId(),
093 firstBlock.getIndexInFile(),
094 firstBlock.getStartLine(),
095 lastBlock.getEndLine());
096
097 // TODO Godin: maybe use FastStringComparator here ?
098 if (originResourceId.equals(part.getResourceId())) { // part from origin
099 if (origin == null) {
100 origin = part;
101 // To calculate length important to use the origin, because otherwise block may come from DB without required data
102 builder.setLengthInUnits(lastBlock.getEndUnit() - firstBlock.getStartUnit() + 1);
103 } else if (part.getUnitStart() < origin.getUnitStart()) {
104 origin = part;
105 }
106 }
107
108 parts.add(part);
109 }
110
111 Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR);
112 builder.setOrigin(origin).setParts(parts);
113
114 filter(builder.build());
115
116 reset();
117 }
118
119 /**
120 * Prepare for processing of next duplication.
121 */
122 private void reset() {
123 blockNumbers = null;
124 count = 0;
125 }
126
127 /**
128 * Saves CloneGroup, if it is not included into previously saved.
129 * <p>
130 * Current CloneGroup can not include none of CloneGroup, which were constructed before.
131 * Proof:
132 * According to an order of visiting nodes in suffix tree - length of earlier >= length of current.
133 * If length of earlier > length of current, then earlier not contained in current.
134 * If length of earlier = length of current, then earlier can be contained in current only
135 * when current has exactly the same and maybe some additional CloneParts as earlier,
136 * what in his turn will mean that two inner-nodes on same depth will satisfy condition
137 * current.startSize <= earlier.startSize <= earlier.endSize <= current.endSize , which is not possible for different inner-nodes on same depth.
138 * </p>
139 * Thus this method checks only that none of CloneGroup, which was constructed before, does not include current CloneGroup.
140 */
141 private void filter(CloneGroup current) {
142 for (CloneGroup earlier : filtered) {
143 if (containsIn(current, earlier)) {
144 return;
145 }
146 }
147 filtered.add(current);
148 }
149
150 /**
151 * Checks that second CloneGroup includes first one.
152 * <p>
153 * CloneGroup A is included in another CloneGroup B, if every part pA from A has part pB in B,
154 * which satisfy the conditions:
155 * <pre>
156 * (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd)
157 * </pre>
158 * And all resourcesId from B exactly the same as all resourceId from A, which means that also every part pB from B has part pA in A,
159 * which satisfy the condition:
160 * <pre>
161 * pB.resourceId == pA.resourceId
162 * </pre>
163 * Inclusion is the partial order, thus this relation is:
164 * <ul>
165 * <li>reflexive - A in A</li>
166 * <li>transitive - (A in B) and (B in C) => (A in C)</li>
167 * <li>antisymmetric - (A in B) and (B in A) <=> (A = B)</li>
168 * </ul>
169 * </p>
170 * <p>
171 * This method uses the fact that all parts already sorted by resourceId and unitStart (see {@link ContainsInComparator#CLONEPART_COMPARATOR}),
172 * so running time - O(|A|+|B|).
173 * </p>
174 */
175 private static boolean containsIn(CloneGroup first, CloneGroup second) {
176 List<ClonePart> firstParts = first.getCloneParts();
177 List<ClonePart> secondParts = second.getCloneParts();
178 // TODO Godin: according to tests seems that if first part of condition is true, then second part can not be false
179 // if this can be proved, then second part can be removed
180 return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(second.getCloneUnitLength(), first.getCloneUnitLength()))
181 && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR);
182 }
183
184 }