001    /*
002     * Sonar, open source software quality management tool.
003     * Copyright (C) 2008-2011 SonarSource
004     * mailto:contact AT sonarsource DOT com
005     *
006     * Sonar is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 3 of the License, or (at your option) any later version.
010     *
011     * Sonar is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public
017     * License along with Sonar; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019     */
020    package org.sonar.duplications.detector.suffixtree;
021    
022    import java.util.Collections;
023    import java.util.List;
024    
025    import org.sonar.duplications.block.Block;
026    import org.sonar.duplications.detector.ContainsInComparator;
027    import org.sonar.duplications.index.CloneGroup;
028    import org.sonar.duplications.index.ClonePart;
029    import org.sonar.duplications.utils.SortedListsUtils;
030    
031    import com.google.common.collect.Lists;
032    
033    /**
034     * Implementation of {@link Search.Collector}, which constructs {@link CloneGroup}s.
035     */
036    public class DuplicationsCollector extends Search.Collector {
037    
038      private final TextSet text;
039      private final String originResourceId;
040    
041      private final List<CloneGroup> filtered = Lists.newArrayList();
042    
043      private int length;
044      private ClonePart origin;
045      private List<ClonePart> parts;
046    
047      public DuplicationsCollector(TextSet text) {
048        this.text = text;
049        this.originResourceId = text.getBlock(0).getResourceId();
050      }
051    
052      /**
053       * @return current result
054       */
055      public List<CloneGroup> getResult() {
056        return filtered;
057      }
058    
059      @Override
060      public void startOfGroup(int size, int length) {
061        this.length = length;
062        this.parts = Lists.newArrayListWithCapacity(size);
063      }
064    
065      /**
066       * Constructs ClonePart and saves it for future processing in {@link #endOfGroup()}.
067       * 
068       * @param start number of first block from text for this part
069       * @param end number of last block from text for this part
070       * @param len number of blocks in this part
071       */
072      @Override
073      public void part(int start, int end) {
074        Block firstBlock = text.getBlock(start);
075        Block lastBlock = text.getBlock(end - 1);
076    
077        ClonePart part = new ClonePart(
078            firstBlock.getResourceId(),
079            firstBlock.getIndexInFile(),
080            firstBlock.getFirstLineNumber(),
081            lastBlock.getLastLineNumber());
082    
083        // TODO Godin: maybe use FastStringComparator here ?
084        if (originResourceId.equals(part.getResourceId())) { // part from origin
085          if (origin == null) {
086            origin = part;
087          } else if (part.getUnitStart() < origin.getUnitStart()) {
088            origin = part;
089          }
090        }
091    
092        parts.add(part);
093      }
094    
095      /**
096       * Constructs CloneGroup and saves it.
097       */
098      @Override
099      public void endOfGroup() {
100        Collections.sort(parts, ContainsInComparator.CLONEPART_COMPARATOR);
101        CloneGroup group = new CloneGroup(length, origin, parts);
102        filter(group);
103    
104        parts = null;
105        origin = null;
106      }
107    
108      /**
109       * Saves CloneGroup, if it is not included into previously saved.
110       * <p>
111       * Current CloneGroup can not include none of CloneGroup, which were constructed before.
112       * Proof:
113       * According to an order of visiting nodes in suffix tree - length of earlier >= length of current.
114       * If length of earlier > length of current, then earlier not contained in current.
115       * If length of earlier = length of current, then earlier can be contained in current only
116       * when current has exactly the same and maybe some additional CloneParts as earlier,
117       * what in his turn will mean that two inner-nodes on same depth will satisfy condition
118       * current.startSize <= earlier.startSize <= earlier.endSize <= current.endSize , which is not possible for different inner-nodes on same depth.
119       * </p>
120       * Thus this method checks only that none of CloneGroup, which was constructed before, does not include current CloneGroup.
121       */
122      private void filter(CloneGroup current) {
123        for (CloneGroup earlier : filtered) {
124          if (containsIn(current, earlier)) {
125            return;
126          }
127        }
128        filtered.add(current);
129      }
130    
131      /**
132       * Checks that second CloneGroup includes first one.
133       * <p>
134       * CloneGroup A is included in another CloneGroup B, if every part pA from A has part pB in B,
135       * which satisfy the conditions:
136       * <pre>
137       * (pA.resourceId == pB.resourceId) and (pB.unitStart <= pA.unitStart) and (pA.unitEnd <= pB.unitEnd)
138       * </pre>
139       * And all resourcesId from B exactly the same as all resourceId from A, which means that also every part pB from B has part pA in A,
140       * which satisfy the condition:
141       * <pre>
142       * pB.resourceId == pA.resourceId
143       * </pre>
144       * Inclusion is the partial order, thus this relation is:
145       * <ul>
146       * <li>reflexive - A in A</li>
147       * <li>transitive - (A in B) and (B in C) => (A in C)</li>
148       * <li>antisymmetric - (A in B) and (B in A) <=> (A = B)</li>
149       * </ul>
150       * </p>
151       * <p>
152       * This method uses the fact that all parts already sorted by resourceId and unitStart (see {@link ContainsInComparator#CLONEPART_COMPARATOR}),
153       * so running time - O(|A|+|B|).
154       * </p>
155       */
156      private static boolean containsIn(CloneGroup first, CloneGroup second) {
157        List<ClonePart> firstParts = first.getCloneParts();
158        List<ClonePart> secondParts = second.getCloneParts();
159        // TODO Godin: according to tests seems that if first part of condition is true, then second part can not be false
160        // if this can be proved, then second part can be removed
161        return SortedListsUtils.contains(secondParts, firstParts, new ContainsInComparator(second.getCloneUnitLength(), first.getCloneUnitLength()))
162            && SortedListsUtils.contains(firstParts, secondParts, ContainsInComparator.RESOURCE_ID_COMPARATOR);
163      }
164    
165    }