001/*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2012 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019 */
020package org.sonar.duplications.detector.suffixtree;
021
022import com.google.common.collect.Lists;
023import org.sonar.duplications.block.Block;
024import org.sonar.duplications.detector.ContainsInComparator;
025import org.sonar.duplications.index.CloneGroup;
026import org.sonar.duplications.index.ClonePart;
027import org.sonar.duplications.utils.SortedListsUtils;
028
029import java.util.Collections;
030import java.util.List;
031
032/**
033 * Implementation of {@link Search.Collector}, which constructs {@link CloneGroup}s.
034 */
035public class DuplicationsCollector extends Search.Collector {
036
037  private final TextSet text;
038  private final String originResourceId;
039
040  private final List<CloneGroup> filtered = Lists.newArrayList();
041
042  private int length;
043  private int count;
044  private int[][] blockNumbers;
045
046  public DuplicationsCollector(TextSet text) {
047    this.text = text;
048    this.originResourceId = text.getBlock(0).getResourceId();
049  }
050
051  /**
052   * @return current result
053   */
054  public List<CloneGroup> getResult() {
055    return filtered;
056  }
057
058  @Override
059  public void startOfGroup(int size, int length) {
060    this.blockNumbers = new int[size][2];
061    this.length = length;
062  }
063
064  /**
065   * Constructs ClonePart and saves it for future processing in {@link #endOfGroup()}.
066   *
067   * @param start number of first block from text for this part
068   * @param end number of last block from text for this part
069   * @param len number of blocks in this part
070   */
071  @Override
072  public void part(int start, int end) {
073    blockNumbers[count][0] = start;
074    blockNumbers[count][1] = end - 1;
075    count++;
076  }
077
078  /**
079   * Constructs CloneGroup and saves it.
080   */
081  @Override
082  public void endOfGroup() {
083    ClonePart origin = null;
084
085    CloneGroup.Builder builder = CloneGroup.builder().setLength(length);
086
087    List<ClonePart> parts = Lists.newArrayListWithCapacity(count);
088    for (int[] b : blockNumbers) {
089      Block firstBlock = text.getBlock(b[0]);
090      Block lastBlock = text.getBlock(b[1]);
091      ClonePart part = new ClonePart(
092          firstBlock.getResourceId(),
093          firstBlock.getIndexInFile(),
094          firstBlock.getStartLine(),
095          lastBlock.getEndLine());
096
097      // TODO Godin: maybe use FastStringComparator here ?
098      if (originResourceId.equals(part.getResourceId())) { // part from origin
099        if (origin == null) {
100          origin = part;
101          // To calculate length important to use the origin, because otherwise block may come from DB without required data
102          builder.setLengthInUnits(lastBlock.getEndUnit() - firstBlock.getStartUnit() + 1);
103        } else if (part.getUnitStart() < origin.getUnitStart()) {
104          origin = part;
105        }
106      }
107
108      parts.add(part);
109    }
110
111    Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR);
112    builder.setOrigin(origin).setParts(parts);
113
114    filter(builder.build());
115
116    reset();
117  }
118
119  /**
120   * Prepare for processing of next duplication.
121   */
122  private void reset() {
123    blockNumbers = null;
124    count = 0;
125  }
126
127  /**
128   * Saves CloneGroup, if it is not included into previously saved.
129   * <p>
130   * Current CloneGroup can not include none of CloneGroup, which were constructed before.
131   * Proof:
132   * According to an order of visiting nodes in suffix tree - length of earlier >= length of current.
133   * If length of earlier > length of current, then earlier not contained in current.
134   * If length of earlier = length of current, then earlier can be contained in current only
135   * when current has exactly the same and maybe some additional CloneParts as earlier,
136   * what in his turn will mean that two inner-nodes on same depth will satisfy condition
137   * current.startSize <= earlier.startSize <= earlier.endSize <= current.endSize , which is not possible for different inner-nodes on same depth.
138   * </p>
139   * Thus this method checks only that none of CloneGroup, which was constructed before, does not include current CloneGroup.
140   */
141  private void filter(CloneGroup current) {
142    for (CloneGroup earlier : filtered) {
143      if (containsIn(current, earlier)) {
144        return;
145      }
146    }
147    filtered.add(current);
148  }
149
150  /**
151   * Checks that second CloneGroup includes first one.
152   * <p>
153   * CloneGroup A is included in another CloneGroup B, if every part pA from A has part pB in B,
154   * which satisfy the conditions:
155   * <pre>
156   * (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd)
157   * </pre>
158   * And all resourcesId from B exactly the same as all resourceId from A, which means that also every part pB from B has part pA in A,
159   * which satisfy the condition:
160   * <pre>
161   * pB.resourceId == pA.resourceId
162   * </pre>
163   * Inclusion is the partial order, thus this relation is:
164   * <ul>
165   * <li>reflexive - A in A</li>
166   * <li>transitive - (A in B) and (B in C) => (A in C)</li>
167   * <li>antisymmetric - (A in B) and (B in A) <=> (A = B)</li>
168   * </ul>
169   * </p>
170   * <p>
171   * This method uses the fact that all parts already sorted by resourceId and unitStart (see {@link ContainsInComparator#CLONEPART_COMPARATOR}),
172   * so running time - O(|A|+|B|).
173   * </p>
174   */
175  private static boolean containsIn(CloneGroup first, CloneGroup second) {
176    List<ClonePart> firstParts = first.getCloneParts();
177    List<ClonePart> secondParts = second.getCloneParts();
178    // TODO Godin: according to tests seems that if first part of condition is true, then second part can not be false
179    // if this can be proved, then second part can be removed
180    return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(second.getCloneUnitLength(), first.getCloneUnitLength()))
181      && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR);
182  }
183
184}