001/* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2012 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020package org.sonar.duplications.detector.suffixtree; 021 022import java.util.*; 023 024import org.sonar.duplications.block.Block; 025import org.sonar.duplications.block.ByteArray; 026import org.sonar.duplications.index.CloneGroup; 027import org.sonar.duplications.index.CloneIndex; 028 029import com.google.common.collect.*; 030 031public final class SuffixTreeCloneDetectionAlgorithm { 032 033 public static List<CloneGroup> detect(CloneIndex cloneIndex, Collection<Block> fileBlocks) { 034 if (fileBlocks.isEmpty()) { 035 return Collections.EMPTY_LIST; 036 } 037 TextSet text = createTextSet(cloneIndex, fileBlocks); 038 if (text == null) { 039 return Collections.EMPTY_LIST; 040 } 041 DuplicationsCollector reporter = new DuplicationsCollector(text); 042 Search.perform(text, reporter); 043 return reporter.getResult(); 044 } 045 046 private SuffixTreeCloneDetectionAlgorithm() { 047 } 048 049 private static TextSet createTextSet(CloneIndex index, Collection<Block> fileBlocks) { 050 Set<ByteArray> hashes = Sets.newHashSet(); 051 for (Block fileBlock : fileBlocks) { 052 hashes.add(fileBlock.getBlockHash()); 053 } 054 055 String originResourceId = fileBlocks.iterator().next().getResourceId(); 056 Map<String, List<Block>> fromIndex = retrieveFromIndex(index, originResourceId, hashes); 057 058 if (fromIndex.isEmpty() && hashes.size() == fileBlocks.size()) { // optimization for the case when there is no duplications 059 return null; 060 } 061 062 return createTextSet(fileBlocks, fromIndex); 063 } 064 065 private static TextSet createTextSet(Collection<Block> fileBlocks, Map<String, List<Block>> fromIndex) { 066 TextSet.Builder textSetBuilder = TextSet.builder(); 067 // TODO Godin: maybe we can reduce size of tree and so memory consumption by removing non-repeatable blocks 068 List<Block> sortedFileBlocks = Lists.newArrayList(fileBlocks); 069 Collections.sort(sortedFileBlocks, BLOCK_COMPARATOR); 070 textSetBuilder.add(sortedFileBlocks); 071 072 for (List<Block> list : fromIndex.values()) { 073 Collections.sort(list, BLOCK_COMPARATOR); 074 075 int i = 0; 076 while (i < list.size()) { 077 int j = i + 1; 078 while ((j < list.size()) && (list.get(j).getIndexInFile() == list.get(j - 1).getIndexInFile() + 1)) { 079 j++; 080 } 081 textSetBuilder.add(list.subList(i, j)); 082 i = j; 083 } 084 } 085 086 return textSetBuilder.build(); 087 } 088 089 private static Map<String, List<Block>> retrieveFromIndex(CloneIndex index, String originResourceId, Set<ByteArray> hashes) { 090 Map<String, List<Block>> collection = Maps.newHashMap(); 091 for (ByteArray hash : hashes) { 092 Collection<Block> blocks = index.getBySequenceHash(hash); 093 for (Block blockFromIndex : blocks) { 094 // Godin: skip blocks for this file if they come from index 095 String resourceId = blockFromIndex.getResourceId(); 096 if (!originResourceId.equals(resourceId)) { 097 List<Block> list = collection.get(resourceId); 098 if (list == null) { 099 list = Lists.newArrayList(); 100 collection.put(resourceId, list); 101 } 102 list.add(blockFromIndex); 103 } 104 } 105 } 106 return collection; 107 } 108 109 private static final Comparator<Block> BLOCK_COMPARATOR = new Comparator<Block>() { 110 public int compare(Block o1, Block o2) { 111 return o1.getIndexInFile() - o2.getIndexInFile(); 112 } 113 }; 114 115}