001    /*
002     * Sonar, open source software quality management tool.
003     * Copyright (C) 2008-2012 SonarSource
004     * mailto:contact AT sonarsource DOT com
005     *
006     * Sonar is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 3 of the License, or (at your option) any later version.
010     *
011     * Sonar is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public
017     * License along with Sonar; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019     */
020    package org.sonar.duplications.detector.suffixtree;
021    
022    import com.google.common.collect.Lists;
023    import org.sonar.duplications.block.Block;
024    import org.sonar.duplications.detector.ContainsInComparator;
025    import org.sonar.duplications.index.CloneGroup;
026    import org.sonar.duplications.index.ClonePart;
027    import org.sonar.duplications.utils.SortedListsUtils;
028    
029    import java.util.Collections;
030    import java.util.List;
031    
032    /**
033     * Implementation of {@link Search.Collector}, which constructs {@link CloneGroup}s.
034     */
035    public class DuplicationsCollector extends Search.Collector {
036    
037      private final TextSet text;
038      private final String originResourceId;
039    
040      private final List<CloneGroup> filtered = Lists.newArrayList();
041    
042      private int length;
043      private int count;
044      private int[][] blockNumbers;
045    
046      public DuplicationsCollector(TextSet text) {
047        this.text = text;
048        this.originResourceId = text.getBlock(0).getResourceId();
049      }
050    
051      /**
052       * @return current result
053       */
054      public List<CloneGroup> getResult() {
055        return filtered;
056      }
057    
058      @Override
059      public void startOfGroup(int size, int length) {
060        this.blockNumbers = new int[size][2];
061        this.length = length;
062      }
063    
064      /**
065       * Constructs ClonePart and saves it for future processing in {@link #endOfGroup()}.
066       *
067       * @param start number of first block from text for this part
068       * @param end number of last block from text for this part
069       * @param len number of blocks in this part
070       */
071      @Override
072      public void part(int start, int end) {
073        blockNumbers[count][0] = start;
074        blockNumbers[count][1] = end - 1;
075        count++;
076      }
077    
078      /**
079       * Constructs CloneGroup and saves it.
080       */
081      @Override
082      public void endOfGroup() {
083        ClonePart origin = null;
084    
085        CloneGroup.Builder builder = CloneGroup.builder().setLength(length);
086    
087        List<ClonePart> parts = Lists.newArrayListWithCapacity(count);
088        for (int[] b : blockNumbers) {
089          Block firstBlock = text.getBlock(b[0]);
090          Block lastBlock = text.getBlock(b[1]);
091          ClonePart part = new ClonePart(
092              firstBlock.getResourceId(),
093              firstBlock.getIndexInFile(),
094              firstBlock.getStartLine(),
095              lastBlock.getEndLine());
096    
097          // TODO Godin: maybe use FastStringComparator here ?
098          if (originResourceId.equals(part.getResourceId())) { // part from origin
099            if (origin == null) {
100              origin = part;
101              // To calculate length important to use the origin, because otherwise block may come from DB without required data
102              builder.setLengthInUnits(lastBlock.getEndUnit() - firstBlock.getStartUnit() + 1);
103            } else if (part.getUnitStart() < origin.getUnitStart()) {
104              origin = part;
105            }
106          }
107    
108          parts.add(part);
109        }
110    
111        Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR);
112        builder.setOrigin(origin).setParts(parts);
113    
114        filter(builder.build());
115    
116        reset();
117      }
118    
119      /**
120       * Prepare for processing of next duplication.
121       */
122      private void reset() {
123        blockNumbers = null;
124        count = 0;
125      }
126    
127      /**
128       * Saves CloneGroup, if it is not included into previously saved.
129       * <p>
130       * Current CloneGroup can not include none of CloneGroup, which were constructed before.
131       * Proof:
132       * According to an order of visiting nodes in suffix tree - length of earlier >= length of current.
133       * If length of earlier > length of current, then earlier not contained in current.
134       * If length of earlier = length of current, then earlier can be contained in current only
135       * when current has exactly the same and maybe some additional CloneParts as earlier,
136       * what in his turn will mean that two inner-nodes on same depth will satisfy condition
137       * current.startSize <= earlier.startSize <= earlier.endSize <= current.endSize , which is not possible for different inner-nodes on same depth.
138       * </p>
139       * Thus this method checks only that none of CloneGroup, which was constructed before, does not include current CloneGroup.
140       */
141      private void filter(CloneGroup current) {
142        for (CloneGroup earlier : filtered) {
143          if (containsIn(current, earlier)) {
144            return;
145          }
146        }
147        filtered.add(current);
148      }
149    
150      /**
151       * Checks that second CloneGroup includes first one.
152       * <p>
153       * CloneGroup A is included in another CloneGroup B, if every part pA from A has part pB in B,
154       * which satisfy the conditions:
155       * <pre>
156       * (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd)
157       * </pre>
158       * And all resourcesId from B exactly the same as all resourceId from A, which means that also every part pB from B has part pA in A,
159       * which satisfy the condition:
160       * <pre>
161       * pB.resourceId == pA.resourceId
162       * </pre>
163       * Inclusion is the partial order, thus this relation is:
164       * <ul>
165       * <li>reflexive - A in A</li>
166       * <li>transitive - (A in B) and (B in C) => (A in C)</li>
167       * <li>antisymmetric - (A in B) and (B in A) <=> (A = B)</li>
168       * </ul>
169       * </p>
170       * <p>
171       * This method uses the fact that all parts already sorted by resourceId and unitStart (see {@link ContainsInComparator#CLONEPART_COMPARATOR}),
172       * so running time - O(|A|+|B|).
173       * </p>
174       */
175      private static boolean containsIn(CloneGroup first, CloneGroup second) {
176        List<ClonePart> firstParts = first.getCloneParts();
177        List<ClonePart> secondParts = second.getCloneParts();
178        // TODO Godin: according to tests seems that if first part of condition is true, then second part can not be false
179        // if this can be proved, then second part can be removed
180        return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(second.getCloneUnitLength(), first.getCloneUnitLength()))
181          && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR);
182      }
183    
184    }