001 /* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2011 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020 package org.sonar.duplications.detector.suffixtree; 021 022 import java.util.Collections; 023 import java.util.List; 024 025 import org.sonar.duplications.block.Block; 026 import org.sonar.duplications.detector.ContainsInComparator; 027 import org.sonar.duplications.index.CloneGroup; 028 import org.sonar.duplications.index.ClonePart; 029 import org.sonar.duplications.utils.SortedListsUtils; 030 031 import com.google.common.collect.Lists; 032 033 /** 034 * Implementation of {@link Search.Collector}, which constructs {@link CloneGroup}s. 035 */ 036 public class DuplicationsCollector extends Search.Collector { 037 038 private final TextSet text; 039 private final String originResourceId; 040 041 private final List<CloneGroup> filtered = Lists.newArrayList(); 042 043 private int length; 044 private ClonePart origin; 045 private List<ClonePart> parts; 046 047 public DuplicationsCollector(TextSet text) { 048 this.text = text; 049 this.originResourceId = text.getBlock(0).getResourceId(); 050 } 051 052 /** 053 * @return current result 054 */ 055 public List<CloneGroup> getResult() { 056 return filtered; 057 } 058 059 @Override 060 public void startOfGroup(int size, int length) { 061 this.length = length; 062 this.parts = Lists.newArrayListWithCapacity(size); 063 } 064 065 /** 066 * Constructs ClonePart and saves it for future processing in {@link #endOfGroup()}. 067 * 068 * @param start number of first block from text for this part 069 * @param end number of last block from text for this part 070 * @param len number of blocks in this part 071 */ 072 @Override 073 public void part(int start, int end) { 074 Block firstBlock = text.getBlock(start); 075 Block lastBlock = text.getBlock(end - 1); 076 077 ClonePart part = new ClonePart( 078 firstBlock.getResourceId(), 079 firstBlock.getIndexInFile(), 080 firstBlock.getFirstLineNumber(), 081 lastBlock.getLastLineNumber()); 082 083 // TODO Godin: maybe use FastStringComparator here ? 084 if (originResourceId.equals(part.getResourceId())) { // part from origin 085 if (origin == null) { 086 origin = part; 087 } else if (part.getUnitStart() < origin.getUnitStart()) { 088 origin = part; 089 } 090 } 091 092 parts.add(part); 093 } 094 095 /** 096 * Constructs CloneGroup and saves it. 097 */ 098 @Override 099 public void endOfGroup() { 100 Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR); 101 CloneGroup group = new CloneGroup(length, origin, parts); 102 filter(group); 103 104 parts = null; 105 origin = null; 106 } 107 108 /** 109 * Saves CloneGroup, if it is not included into previously saved. 110 * <p> 111 * Current CloneGroup can not include none of CloneGroup, which were constructed before. 112 * Proof: 113 * According to an order of visiting nodes in suffix tree - length of earlier >= length of current. 114 * If length of earlier > length of current, then earlier not contained in current. 115 * If length of earlier = length of current, then earlier can be contained in current only 116 * when current has exactly the same and maybe some additional CloneParts as earlier, 117 * what in his turn will mean that two inner-nodes on same depth will satisfy condition 118 * current.startSize <= earlier.startSize <= earlier.endSize <= current.endSize , which is not possible for different inner-nodes on same depth. 119 * </p> 120 * Thus this method checks only that none of CloneGroup, which was constructed before, does not include current CloneGroup. 121 */ 122 private void filter(CloneGroup current) { 123 for (CloneGroup earlier : filtered) { 124 if (containsIn(current, earlier)) { 125 return; 126 } 127 } 128 filtered.add(current); 129 } 130 131 /** 132 * Checks that second CloneGroup includes first one. 133 * <p> 134 * CloneGroup A is included in another CloneGroup B, if every part pA from A has part pB in B, 135 * which satisfy the conditions: 136 * <pre> 137 * (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd) 138 * </pre> 139 * And all resourcesId from B exactly the same as all resourceId from A, which means that also every part pB from B has part pA in A, 140 * which satisfy the condition: 141 * <pre> 142 * pB.resourceId == pA.resourceId 143 * </pre> 144 * Inclusion is the partial order, thus this relation is: 145 * <ul> 146 * <li>reflexive - A in A</li> 147 * <li>transitive - (A in B) and (B in C) => (A in C)</li> 148 * <li>antisymmetric - (A in B) and (B in A) <=> (A = B)</li> 149 * </ul> 150 * </p> 151 * <p> 152 * This method uses the fact that all parts already sorted by resourceId and unitStart (see {@link ContainsInComparator#CLONEPART_COMPARATOR}), 153 * so running time - O(|A|+|B|). 154 * </p> 155 */ 156 private static boolean containsIn(CloneGroup first, CloneGroup second) { 157 List<ClonePart> firstParts = first.getCloneParts(); 158 List<ClonePart> secondParts = second.getCloneParts(); 159 // TODO Godin: according to tests seems that if first part of condition is true, then second part can not be false 160 // if this can be proved, then second part can be removed 161 return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(second.getCloneUnitLength(), first.getCloneUnitLength())) 162 && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR); 163 } 164 165 }