001 /* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2012 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020 package org.sonar.duplications.detector.suffixtree; 021 022 import com.google.common.collect.Lists; 023 import org.sonar.duplications.block.Block; 024 import org.sonar.duplications.detector.ContainsInComparator; 025 import org.sonar.duplications.index.CloneGroup; 026 import org.sonar.duplications.index.ClonePart; 027 import org.sonar.duplications.utils.SortedListsUtils; 028 029 import java.util.Collections; 030 import java.util.List; 031 032 /** 033 * Implementation of {@link Search.Collector}, which constructs {@link CloneGroup}s. 034 */ 035 public class DuplicationsCollector extends Search.Collector { 036 037 private final TextSet text; 038 private final String originResourceId; 039 040 private final List<CloneGroup> filtered = Lists.newArrayList(); 041 042 private int length; 043 private int count; 044 private int[][] blockNumbers; 045 046 public DuplicationsCollector(TextSet text) { 047 this.text = text; 048 this.originResourceId = text.getBlock(0).getResourceId(); 049 } 050 051 /** 052 * @return current result 053 */ 054 public List<CloneGroup> getResult() { 055 return filtered; 056 } 057 058 @Override 059 public void startOfGroup(int size, int length) { 060 this.blockNumbers = new int[size][2]; 061 this.length = length; 062 } 063 064 /** 065 * Constructs ClonePart and saves it for future processing in {@link #endOfGroup()}. 066 * 067 * @param start number of first block from text for this part 068 * @param end number of last block from text for this part 069 * @param len number of blocks in this part 070 */ 071 @Override 072 public void part(int start, int end) { 073 blockNumbers[count][0] = start; 074 blockNumbers[count][1] = end - 1; 075 count++; 076 } 077 078 /** 079 * Constructs CloneGroup and saves it. 080 */ 081 @Override 082 public void endOfGroup() { 083 ClonePart origin = null; 084 085 CloneGroup.Builder builder = CloneGroup.builder().setLength(length); 086 087 List<ClonePart> parts = Lists.newArrayListWithCapacity(count); 088 for (int[] b : blockNumbers) { 089 Block firstBlock = text.getBlock(b[0]); 090 Block lastBlock = text.getBlock(b[1]); 091 ClonePart part = new ClonePart( 092 firstBlock.getResourceId(), 093 firstBlock.getIndexInFile(), 094 firstBlock.getStartLine(), 095 lastBlock.getEndLine()); 096 097 // TODO Godin: maybe use FastStringComparator here ? 098 if (originResourceId.equals(part.getResourceId())) { // part from origin 099 if (origin == null) { 100 origin = part; 101 // To calculate length important to use the origin, because otherwise block may come from DB without required data 102 builder.setLengthInUnits(lastBlock.getEndUnit() - firstBlock.getStartUnit() + 1); 103 } else if (part.getUnitStart() < origin.getUnitStart()) { 104 origin = part; 105 } 106 } 107 108 parts.add(part); 109 } 110 111 Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR); 112 builder.setOrigin(origin).setParts(parts); 113 114 filter(builder.build()); 115 116 reset(); 117 } 118 119 /** 120 * Prepare for processing of next duplication. 121 */ 122 private void reset() { 123 blockNumbers = null; 124 count = 0; 125 } 126 127 /** 128 * Saves CloneGroup, if it is not included into previously saved. 129 * <p> 130 * Current CloneGroup can not include none of CloneGroup, which were constructed before. 131 * Proof: 132 * According to an order of visiting nodes in suffix tree - length of earlier >= length of current. 133 * If length of earlier > length of current, then earlier not contained in current. 134 * If length of earlier = length of current, then earlier can be contained in current only 135 * when current has exactly the same and maybe some additional CloneParts as earlier, 136 * what in his turn will mean that two inner-nodes on same depth will satisfy condition 137 * current.startSize <= earlier.startSize <= earlier.endSize <= current.endSize , which is not possible for different inner-nodes on same depth. 138 * </p> 139 * Thus this method checks only that none of CloneGroup, which was constructed before, does not include current CloneGroup. 140 */ 141 private void filter(CloneGroup current) { 142 for (CloneGroup earlier : filtered) { 143 if (containsIn(current, earlier)) { 144 return; 145 } 146 } 147 filtered.add(current); 148 } 149 150 /** 151 * Checks that second CloneGroup includes first one. 152 * <p> 153 * CloneGroup A is included in another CloneGroup B, if every part pA from A has part pB in B, 154 * which satisfy the conditions: 155 * <pre> 156 * (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd) 157 * </pre> 158 * And all resourcesId from B exactly the same as all resourceId from A, which means that also every part pB from B has part pA in A, 159 * which satisfy the condition: 160 * <pre> 161 * pB.resourceId == pA.resourceId 162 * </pre> 163 * Inclusion is the partial order, thus this relation is: 164 * <ul> 165 * <li>reflexive - A in A</li> 166 * <li>transitive - (A in B) and (B in C) => (A in C)</li> 167 * <li>antisymmetric - (A in B) and (B in A) <=> (A = B)</li> 168 * </ul> 169 * </p> 170 * <p> 171 * This method uses the fact that all parts already sorted by resourceId and unitStart (see {@link ContainsInComparator#CLONEPART_COMPARATOR}), 172 * so running time - O(|A|+|B|). 173 * </p> 174 */ 175 private static boolean containsIn(CloneGroup first, CloneGroup second) { 176 List<ClonePart> firstParts = first.getCloneParts(); 177 List<ClonePart> secondParts = second.getCloneParts(); 178 // TODO Godin: according to tests seems that if first part of condition is true, then second part can not be false 179 // if this can be proved, then second part can be removed 180 return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(second.getCloneUnitLength(), first.getCloneUnitLength())) 181 && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR); 182 } 183 184 }