001    /*
002     * Sonar, open source software quality management tool.
003     * Copyright (C) 2008-2012 SonarSource
004     * mailto:contact AT sonarsource DOT com
005     *
006     * Sonar is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 3 of the License, or (at your option) any later version.
010     *
011     * Sonar is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public
017     * License along with Sonar; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019     */
020    package org.sonar.duplications.detector.suffixtree;
021    
022    import java.util.*;
023    
024    import org.sonar.duplications.block.Block;
025    import org.sonar.duplications.block.ByteArray;
026    import org.sonar.duplications.index.CloneGroup;
027    import org.sonar.duplications.index.CloneIndex;
028    
029    import com.google.common.collect.*;
030    
031    public final class SuffixTreeCloneDetectionAlgorithm {
032    
033      public static List<CloneGroup> detect(CloneIndex cloneIndex, Collection<Block> fileBlocks) {
034        if (fileBlocks.isEmpty()) {
035          return Collections.EMPTY_LIST;
036        }
037        TextSet text = createTextSet(cloneIndex, fileBlocks);
038        if (text == null) {
039          return Collections.EMPTY_LIST;
040        }
041        DuplicationsCollector reporter = new DuplicationsCollector(text);
042        Search.perform(text, reporter);
043        return reporter.getResult();
044      }
045    
046      private SuffixTreeCloneDetectionAlgorithm() {
047      }
048    
049      private static TextSet createTextSet(CloneIndex index, Collection<Block> fileBlocks) {
050        Set<ByteArray> hashes = Sets.newHashSet();
051        for (Block fileBlock : fileBlocks) {
052          hashes.add(fileBlock.getBlockHash());
053        }
054    
055        String originResourceId = fileBlocks.iterator().next().getResourceId();
056        Map<String, List<Block>> fromIndex = retrieveFromIndex(index, originResourceId, hashes);
057    
058        if (fromIndex.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications
059          return null;
060        }
061    
062        return createTextSet(fileBlocks, fromIndex);
063      }
064    
065      private static TextSet createTextSet(Collection<Block> fileBlocks, Map<String, List<Block>> fromIndex) {
066        TextSet.Builder textSetBuilder = TextSet.builder();
067        // TODO Godin: maybe we can reduce size of tree and so memory consumption by removing non-repeatable blocks
068        List<Block> sortedFileBlocks = Lists.newArrayList(fileBlocks);
069        Collections.sort(sortedFileBlocks, BLOCK_COMPARATOR);
070        textSetBuilder.add(sortedFileBlocks);
071    
072        for (List<Block> list : fromIndex.values()) {
073          Collections.sort(list, BLOCK_COMPARATOR);
074    
075          int i = 0;
076          while (i < list.size()) {
077            int j = i + 1;
078            while ((j < list.size()) && (list.get(j).getIndexInFile() == list.get(j - 1).getIndexInFile() + 1)) {
079              j++;
080            }
081            textSetBuilder.add(list.subList(i, j));
082            i = j;
083          }
084        }
085    
086        return textSetBuilder.build();
087      }
088    
089      private static Map<String, List<Block>> retrieveFromIndex(CloneIndex index, String originResourceId, Set<ByteArray> hashes) {
090        Map<String, List<Block>> collection = Maps.newHashMap();
091        for (ByteArray hash : hashes) {
092          Collection<Block> blocks = index.getBySequenceHash(hash);
093          for (Block blockFromIndex : blocks) {
094            // Godin: skip blocks for this file if they come from index
095            String resourceId = blockFromIndex.getResourceId();
096            if (!originResourceId.equals(resourceId)) {
097              List<Block> list = collection.get(resourceId);
098              if (list == null) {
099                list = Lists.newArrayList();
100                collection.put(resourceId, list);
101              }
102              list.add(blockFromIndex);
103            }
104          }
105        }
106        return collection;
107      }
108    
109      private static final Comparator<Block> BLOCK_COMPARATOR = new Comparator<Block>() {
110        public int compare(Block o1, Block o2) {
111          return o1.getIndexInFile() - o2.getIndexInFile();
112        }
113      };
114    
115    }