001/*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2012 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019 */
020package org.sonar.duplications.detector.suffixtree;
021
022import java.util.*;
023
024import org.sonar.duplications.block.Block;
025import org.sonar.duplications.block.ByteArray;
026import org.sonar.duplications.index.CloneGroup;
027import org.sonar.duplications.index.CloneIndex;
028
029import com.google.common.collect.*;
030
031public final class SuffixTreeCloneDetectionAlgorithm {
032
033  public static List<CloneGroup> detect(CloneIndex cloneIndex, Collection<Block> fileBlocks) {
034    if (fileBlocks.isEmpty()) {
035      return Collections.EMPTY_LIST;
036    }
037    TextSet text = createTextSet(cloneIndex, fileBlocks);
038    if (text == null) {
039      return Collections.EMPTY_LIST;
040    }
041    DuplicationsCollector reporter = new DuplicationsCollector(text);
042    Search.perform(text, reporter);
043    return reporter.getResult();
044  }
045
046  private SuffixTreeCloneDetectionAlgorithm() {
047  }
048
049  private static TextSet createTextSet(CloneIndex index, Collection<Block> fileBlocks) {
050    Set<ByteArray> hashes = Sets.newHashSet();
051    for (Block fileBlock : fileBlocks) {
052      hashes.add(fileBlock.getBlockHash());
053    }
054
055    String originResourceId = fileBlocks.iterator().next().getResourceId();
056    Map<String, List<Block>> fromIndex = retrieveFromIndex(index, originResourceId, hashes);
057
058    if (fromIndex.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications
059      return null;
060    }
061
062    return createTextSet(fileBlocks, fromIndex);
063  }
064
065  private static TextSet createTextSet(Collection<Block> fileBlocks, Map<String, List<Block>> fromIndex) {
066    TextSet.Builder textSetBuilder = TextSet.builder();
067    // TODO Godin: maybe we can reduce size of tree and so memory consumption by removing non-repeatable blocks
068    List<Block> sortedFileBlocks = Lists.newArrayList(fileBlocks);
069    Collections.sort(sortedFileBlocks, BLOCK_COMPARATOR);
070    textSetBuilder.add(sortedFileBlocks);
071
072    for (List<Block> list : fromIndex.values()) {
073      Collections.sort(list, BLOCK_COMPARATOR);
074
075      int i = 0;
076      while (i < list.size()) {
077        int j = i + 1;
078        while ((j < list.size()) && (list.get(j).getIndexInFile() == list.get(j - 1).getIndexInFile() + 1)) {
079          j++;
080        }
081        textSetBuilder.add(list.subList(i, j));
082        i = j;
083      }
084    }
085
086    return textSetBuilder.build();
087  }
088
089  private static Map<String, List<Block>> retrieveFromIndex(CloneIndex index, String originResourceId, Set<ByteArray> hashes) {
090    Map<String, List<Block>> collection = Maps.newHashMap();
091    for (ByteArray hash : hashes) {
092      Collection<Block> blocks = index.getBySequenceHash(hash);
093      for (Block blockFromIndex : blocks) {
094        // Godin: skip blocks for this file if they come from index
095        String resourceId = blockFromIndex.getResourceId();
096        if (!originResourceId.equals(resourceId)) {
097          List<Block> list = collection.get(resourceId);
098          if (list == null) {
099            list = Lists.newArrayList();
100            collection.put(resourceId, list);
101          }
102          list.add(blockFromIndex);
103        }
104      }
105    }
106    return collection;
107  }
108
109  private static final Comparator<Block> BLOCK_COMPARATOR = new Comparator<Block>() {
110    public int compare(Block o1, Block o2) {
111      return o1.getIndexInFile() - o2.getIndexInFile();
112    }
113  };
114
115}