001 /*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2012 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
019 */
020 package org.sonar.duplications.detector.suffixtree;
021
022 import java.util.*;
023
024 import org.sonar.duplications.block.Block;
025 import org.sonar.duplications.block.ByteArray;
026 import org.sonar.duplications.index.CloneGroup;
027 import org.sonar.duplications.index.CloneIndex;
028
029 import com.google.common.collect.*;
030
031 public final class SuffixTreeCloneDetectionAlgorithm {
032
033 public static List<CloneGroup> detect(CloneIndex cloneIndex, Collection<Block> fileBlocks) {
034 if (fileBlocks.isEmpty()) {
035 return Collections.EMPTY_LIST;
036 }
037 TextSet text = createTextSet(cloneIndex, fileBlocks);
038 if (text == null) {
039 return Collections.EMPTY_LIST;
040 }
041 DuplicationsCollector reporter = new DuplicationsCollector(text);
042 Search.perform(text, reporter);
043 return reporter.getResult();
044 }
045
046 private SuffixTreeCloneDetectionAlgorithm() {
047 }
048
049 private static TextSet createTextSet(CloneIndex index, Collection<Block> fileBlocks) {
050 Set<ByteArray> hashes = Sets.newHashSet();
051 for (Block fileBlock : fileBlocks) {
052 hashes.add(fileBlock.getBlockHash());
053 }
054
055 String originResourceId = fileBlocks.iterator().next().getResourceId();
056 Map<String, List<Block>> fromIndex = retrieveFromIndex(index, originResourceId, hashes);
057
058 if (fromIndex.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications
059 return null;
060 }
061
062 return createTextSet(fileBlocks, fromIndex);
063 }
064
065 private static TextSet createTextSet(Collection<Block> fileBlocks, Map<String, List<Block>> fromIndex) {
066 TextSet.Builder textSetBuilder = TextSet.builder();
067 // TODO Godin: maybe we can reduce size of tree and so memory consumption by removing non-repeatable blocks
068 List<Block> sortedFileBlocks = Lists.newArrayList(fileBlocks);
069 Collections.sort(sortedFileBlocks, BLOCK_COMPARATOR);
070 textSetBuilder.add(sortedFileBlocks);
071
072 for (List<Block> list : fromIndex.values()) {
073 Collections.sort(list, BLOCK_COMPARATOR);
074
075 int i = 0;
076 while (i < list.size()) {
077 int j = i + 1;
078 while ((j < list.size()) && (list.get(j).getIndexInFile() == list.get(j - 1).getIndexInFile() + 1)) {
079 j++;
080 }
081 textSetBuilder.add(list.subList(i, j));
082 i = j;
083 }
084 }
085
086 return textSetBuilder.build();
087 }
088
089 private static Map<String, List<Block>> retrieveFromIndex(CloneIndex index, String originResourceId, Set<ByteArray> hashes) {
090 Map<String, List<Block>> collection = Maps.newHashMap();
091 for (ByteArray hash : hashes) {
092 Collection<Block> blocks = index.getBySequenceHash(hash);
093 for (Block blockFromIndex : blocks) {
094 // Godin: skip blocks for this file if they come from index
095 String resourceId = blockFromIndex.getResourceId();
096 if (!originResourceId.equals(resourceId)) {
097 List<Block> list = collection.get(resourceId);
098 if (list == null) {
099 list = Lists.newArrayList();
100 collection.put(resourceId, list);
101 }
102 list.add(blockFromIndex);
103 }
104 }
105 }
106 return collection;
107 }
108
109 private static final Comparator<Block> BLOCK_COMPARATOR = new Comparator<Block>() {
110 public int compare(Block o1, Block o2) {
111 return o1.getIndexInFile() - o2.getIndexInFile();
112 }
113 };
114
115 }