001 /* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2011 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020 package org.sonar.duplications.index; 021 022 import java.util.Collection; 023 import java.util.List; 024 025 import org.sonar.duplications.block.Block; 026 import org.sonar.duplications.block.ByteArray; 027 import org.sonar.duplications.utils.FastStringComparator; 028 029 import com.google.common.collect.Lists; 030 031 /** 032 * Provides an index optimized by memory. 033 * <p> 034 * Each object in Java has an overhead - see 035 * <a href="http://devblog.streamy.com/2009/07/24/determine-size-of-java-object-class/">"HOWTO: Determine the size of a Java Object or Class"</a>. 036 * So to optimize memory consumption, we use flat arrays, however this increases time of queries. 037 * During usual detection of duplicates most time consuming method is a {@link #getByResourceId(String)}: 038 * around 50% of time spent in this class and number of invocations of this method is 1% of total invocations, 039 * however total time spent in this class less than 1 second for small projects and around 2 seconds for projects like JDK. 040 * </p> 041 * <p> 042 * Note that this implementation currently does not support deletion, however it's possible to implement. 043 * </p> 044 */ 045 public class PackedMemoryCloneIndex extends AbstractCloneIndex { 046 047 private static final int DEFAULT_INITIAL_CAPACITY = 1024; 048 049 private static final int BLOCK_INTS = 3; 050 051 private final int hashInts; 052 053 private final int blockInts; 054 055 /** 056 * Indicates that index requires sorting to perform queries. 057 */ 058 private boolean sorted; 059 060 /** 061 * Current number of blocks in index. 062 */ 063 private int size; 064 065 private String[] resourceIds; 066 private int[] blockData; 067 068 private int[] resourceIdsIndex; 069 070 public PackedMemoryCloneIndex() { 071 this(8, DEFAULT_INITIAL_CAPACITY); 072 } 073 074 /** 075 * @param hashBytes size of hash in bytes 076 * @param initialCapacity the initial capacity 077 */ 078 public PackedMemoryCloneIndex(int hashBytes, int initialCapacity) { 079 this.sorted = false; 080 this.hashInts = hashBytes / 4; 081 this.blockInts = hashInts + BLOCK_INTS; 082 this.size = 0; 083 this.resourceIds = new String[initialCapacity]; 084 this.blockData = new int[initialCapacity * blockInts]; 085 this.resourceIdsIndex = new int[initialCapacity]; 086 } 087 088 /** 089 * {@inheritDoc} 090 * <p> 091 * <strong>Note that this implementation does not guarantee that blocks would be sorted by index.</strong> 092 * </p> 093 */ 094 public Collection<Block> getByResourceId(String resourceId) { 095 ensureSorted(); 096 097 // prepare resourceId for binary search 098 resourceIds[size] = resourceId; 099 resourceIdsIndex[size] = size; 100 101 int index = DataUtils.binarySearch(byResourceId); 102 103 List<Block> result = Lists.newArrayList(); 104 int realIndex = resourceIdsIndex[index]; 105 while (index < size && FastStringComparator.INSTANCE.compare(resourceIds[realIndex], resourceId) == 0) { 106 // extract block (note that there is no need to extract resourceId) 107 int offset = realIndex * blockInts; 108 int[] hash = new int[hashInts]; 109 for (int j = 0; j < hashInts; j++) { 110 hash[j] = blockData[offset++]; 111 } 112 int indexInFile = blockData[offset++]; 113 int firstLineNumber = blockData[offset++]; 114 int lastLineNumber = blockData[offset]; 115 116 result.add(new Block(resourceId, new ByteArray(hash), indexInFile, firstLineNumber, lastLineNumber)); 117 118 index++; 119 realIndex = resourceIdsIndex[index]; 120 } 121 return result; 122 } 123 124 /** 125 * {@inheritDoc} 126 */ 127 public Collection<Block> getBySequenceHash(ByteArray sequenceHash) { 128 ensureSorted(); 129 130 // prepare hash for binary search 131 int[] hash = sequenceHash.toIntArray(); 132 if (hash.length != hashInts) { 133 throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length); 134 } 135 int offset = size * blockInts; 136 for (int i = 0; i < hashInts; i++) { 137 blockData[offset++] = hash[i]; 138 } 139 140 int index = DataUtils.binarySearch(byBlockHash); 141 142 List<Block> result = Lists.newArrayList(); 143 while (index < size && !isLessByHash(size, index)) { 144 // extract block (note that there is no need to extract hash) 145 String resourceId = resourceIds[index]; 146 offset = index * blockInts + hashInts; 147 int indexInFile = blockData[offset++]; 148 int firstLineNumber = blockData[offset++]; 149 int lastLineNumber = blockData[offset]; 150 151 result.add(new Block(resourceId, sequenceHash, indexInFile, firstLineNumber, lastLineNumber)); 152 index++; 153 } 154 return result; 155 } 156 157 /** 158 * {@inheritDoc} 159 * <p> 160 * <strong>Note that this implementation allows insertion of two blocks with same index for one resource.</strong> 161 * </p> 162 */ 163 public void insert(Block block) { 164 sorted = false; 165 ensureCapacity(); 166 167 resourceIds[size] = block.getResourceId(); 168 169 int[] hash = block.getBlockHash().toIntArray(); 170 if (hash.length != hashInts) { 171 throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length); 172 } 173 int offset = size * blockInts; 174 for (int i = 0; i < hashInts; i++) { 175 blockData[offset++] = hash[i]; 176 } 177 blockData[offset++] = block.getIndexInFile(); 178 blockData[offset++] = block.getFirstLineNumber(); 179 blockData[offset] = block.getLastLineNumber(); 180 181 size++; 182 } 183 184 /** 185 * Increases the capacity, if necessary. 186 */ 187 private void ensureCapacity() { 188 if (size < resourceIds.length) { 189 return; 190 } 191 int newCapacity = (resourceIds.length * 3) / 2 + 1; 192 // Increase size of resourceIds 193 String[] oldResourceIds = resourceIds; 194 resourceIds = new String[newCapacity]; 195 System.arraycopy(oldResourceIds, 0, resourceIds, 0, oldResourceIds.length); 196 // Increase size of blockData 197 int[] oldBlockData = blockData; 198 blockData = new int[newCapacity * blockInts]; 199 System.arraycopy(oldBlockData, 0, blockData, 0, oldBlockData.length); 200 // Increase size of byResourceIndices (no need to copy old, because would be restored in method ensureSorted) 201 resourceIdsIndex = new int[newCapacity]; 202 sorted = false; 203 } 204 205 /** 206 * Performs sorting, if necessary. 207 */ 208 private void ensureSorted() { 209 if (sorted) { 210 return; 211 } 212 213 ensureCapacity(); 214 215 DataUtils.sort(byBlockHash); 216 for (int i = 0; i < size; i++) { 217 resourceIdsIndex[i] = i; 218 } 219 DataUtils.sort(byResourceId); 220 221 sorted = true; 222 } 223 224 private boolean isLessByHash(int i, int j) { 225 i *= blockInts; 226 j *= blockInts; 227 for (int k = 0; k < hashInts; k++, i++, j++) { 228 if (blockData[i] < blockData[j]) { 229 return true; 230 } 231 if (blockData[i] > blockData[j]) { 232 return false; 233 } 234 } 235 return false; 236 } 237 238 private final DataUtils.Sortable byBlockHash = new DataUtils.Sortable() { 239 public void swap(int i, int j) { 240 String tmp = resourceIds[i]; 241 resourceIds[i] = resourceIds[j]; 242 resourceIds[j] = tmp; 243 244 i *= blockInts; 245 j *= blockInts; 246 for (int k = 0; k < blockInts; k++, i++, j++) { 247 int x = blockData[i]; 248 blockData[i] = blockData[j]; 249 blockData[j] = x; 250 } 251 } 252 253 public boolean isLess(int i, int j) { 254 return isLessByHash(i, j); 255 } 256 257 public int size() { 258 return size; 259 } 260 }; 261 262 private final DataUtils.Sortable byResourceId = new DataUtils.Sortable() { 263 public void swap(int i, int j) { 264 int tmp = resourceIdsIndex[i]; 265 resourceIdsIndex[i] = resourceIdsIndex[j]; 266 resourceIdsIndex[j] = tmp; 267 } 268 269 public boolean isLess(int i, int j) { 270 String s1 = resourceIds[resourceIdsIndex[i]]; 271 String s2 = resourceIds[resourceIdsIndex[j]]; 272 return FastStringComparator.INSTANCE.compare(s1, s2) < 0; 273 } 274 275 public int size() { 276 return size; 277 } 278 }; 279 280 }