001 /*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2011 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
019 */
020 package org.sonar.duplications.index;
021
022 import java.util.Collection;
023 import java.util.List;
024
025 import org.sonar.duplications.block.Block;
026 import org.sonar.duplications.block.ByteArray;
027 import org.sonar.duplications.utils.FastStringComparator;
028
029 import com.google.common.collect.Lists;
030
031 /**
032 * Provides an index optimized by memory.
033 * <p>
034 * Each object in Java has an overhead - see
035 * <a href="http://devblog.streamy.com/2009/07/24/determine-size-of-java-object-class/">"HOWTO: Determine the size of a Java Object or Class"</a>.
036 * So to optimize memory consumption, we use flat arrays, however this increases time of queries.
037 * During usual detection of duplicates most time consuming method is a {@link #getByResourceId(String)}:
038 * around 50% of time spent in this class and number of invocations of this method is 1% of total invocations,
039 * however total time spent in this class less than 1 second for small projects and around 2 seconds for projects like JDK.
040 * </p>
041 * <p>
042 * Note that this implementation currently does not support deletion, however it's possible to implement.
043 * </p>
044 */
045 public class PackedMemoryCloneIndex extends AbstractCloneIndex {
046
047 private static final int DEFAULT_INITIAL_CAPACITY = 1024;
048
049 private static final int BLOCK_INTS = 3;
050
051 private final int hashInts;
052
053 private final int blockInts;
054
055 /**
056 * Indicates that index requires sorting to perform queries.
057 */
058 private boolean sorted;
059
060 /**
061 * Current number of blocks in index.
062 */
063 private int size;
064
065 private String[] resourceIds;
066 private int[] blockData;
067
068 private int[] resourceIdsIndex;
069
070 public PackedMemoryCloneIndex() {
071 this(8, DEFAULT_INITIAL_CAPACITY);
072 }
073
074 /**
075 * @param hashBytes size of hash in bytes
076 * @param initialCapacity the initial capacity
077 */
078 public PackedMemoryCloneIndex(int hashBytes, int initialCapacity) {
079 this.sorted = false;
080 this.hashInts = hashBytes / 4;
081 this.blockInts = hashInts + BLOCK_INTS;
082 this.size = 0;
083 this.resourceIds = new String[initialCapacity];
084 this.blockData = new int[initialCapacity * blockInts];
085 this.resourceIdsIndex = new int[initialCapacity];
086 }
087
088 /**
089 * {@inheritDoc}
090 * <p>
091 * <strong>Note that this implementation does not guarantee that blocks would be sorted by index.</strong>
092 * </p>
093 */
094 public Collection<Block> getByResourceId(String resourceId) {
095 ensureSorted();
096
097 // prepare resourceId for binary search
098 resourceIds[size] = resourceId;
099 resourceIdsIndex[size] = size;
100
101 int index = DataUtils.binarySearch(byResourceId);
102
103 List<Block> result = Lists.newArrayList();
104 int realIndex = resourceIdsIndex[index];
105 while (index < size && FastStringComparator.INSTANCE.compare(resourceIds[realIndex], resourceId) == 0) {
106 // extract block (note that there is no need to extract resourceId)
107 int offset = realIndex * blockInts;
108 int[] hash = new int[hashInts];
109 for (int j = 0; j < hashInts; j++) {
110 hash[j] = blockData[offset++];
111 }
112 int indexInFile = blockData[offset++];
113 int firstLineNumber = blockData[offset++];
114 int lastLineNumber = blockData[offset];
115
116 result.add(new Block(resourceId, new ByteArray(hash), indexInFile, firstLineNumber, lastLineNumber));
117
118 index++;
119 realIndex = resourceIdsIndex[index];
120 }
121 return result;
122 }
123
124 /**
125 * {@inheritDoc}
126 */
127 public Collection<Block> getBySequenceHash(ByteArray sequenceHash) {
128 ensureSorted();
129
130 // prepare hash for binary search
131 int[] hash = sequenceHash.toIntArray();
132 if (hash.length != hashInts) {
133 throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length);
134 }
135 int offset = size * blockInts;
136 for (int i = 0; i < hashInts; i++) {
137 blockData[offset++] = hash[i];
138 }
139
140 int index = DataUtils.binarySearch(byBlockHash);
141
142 List<Block> result = Lists.newArrayList();
143 while (index < size && !isLessByHash(size, index)) {
144 // extract block (note that there is no need to extract hash)
145 String resourceId = resourceIds[index];
146 offset = index * blockInts + hashInts;
147 int indexInFile = blockData[offset++];
148 int firstLineNumber = blockData[offset++];
149 int lastLineNumber = blockData[offset];
150
151 result.add(new Block(resourceId, sequenceHash, indexInFile, firstLineNumber, lastLineNumber));
152 index++;
153 }
154 return result;
155 }
156
157 /**
158 * {@inheritDoc}
159 * <p>
160 * <strong>Note that this implementation allows insertion of two blocks with same index for one resource.</strong>
161 * </p>
162 */
163 public void insert(Block block) {
164 sorted = false;
165 ensureCapacity();
166
167 resourceIds[size] = block.getResourceId();
168
169 int[] hash = block.getBlockHash().toIntArray();
170 if (hash.length != hashInts) {
171 throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length);
172 }
173 int offset = size * blockInts;
174 for (int i = 0; i < hashInts; i++) {
175 blockData[offset++] = hash[i];
176 }
177 blockData[offset++] = block.getIndexInFile();
178 blockData[offset++] = block.getFirstLineNumber();
179 blockData[offset] = block.getLastLineNumber();
180
181 size++;
182 }
183
184 /**
185 * Increases the capacity, if necessary.
186 */
187 private void ensureCapacity() {
188 if (size < resourceIds.length) {
189 return;
190 }
191 int newCapacity = (resourceIds.length * 3) / 2 + 1;
192 // Increase size of resourceIds
193 String[] oldResourceIds = resourceIds;
194 resourceIds = new String[newCapacity];
195 System.arraycopy(oldResourceIds, 0, resourceIds, 0, oldResourceIds.length);
196 // Increase size of blockData
197 int[] oldBlockData = blockData;
198 blockData = new int[newCapacity * blockInts];
199 System.arraycopy(oldBlockData, 0, blockData, 0, oldBlockData.length);
200 // Increase size of byResourceIndices (no need to copy old, because would be restored in method ensureSorted)
201 resourceIdsIndex = new int[newCapacity];
202 sorted = false;
203 }
204
205 /**
206 * Performs sorting, if necessary.
207 */
208 private void ensureSorted() {
209 if (sorted) {
210 return;
211 }
212
213 ensureCapacity();
214
215 DataUtils.sort(byBlockHash);
216 for (int i = 0; i < size; i++) {
217 resourceIdsIndex[i] = i;
218 }
219 DataUtils.sort(byResourceId);
220
221 sorted = true;
222 }
223
224 private boolean isLessByHash(int i, int j) {
225 i *= blockInts;
226 j *= blockInts;
227 for (int k = 0; k < hashInts; k++, i++, j++) {
228 if (blockData[i] < blockData[j]) {
229 return true;
230 }
231 if (blockData[i] > blockData[j]) {
232 return false;
233 }
234 }
235 return false;
236 }
237
238 private final DataUtils.Sortable byBlockHash = new DataUtils.Sortable() {
239 public void swap(int i, int j) {
240 String tmp = resourceIds[i];
241 resourceIds[i] = resourceIds[j];
242 resourceIds[j] = tmp;
243
244 i *= blockInts;
245 j *= blockInts;
246 for (int k = 0; k < blockInts; k++, i++, j++) {
247 int x = blockData[i];
248 blockData[i] = blockData[j];
249 blockData[j] = x;
250 }
251 }
252
253 public boolean isLess(int i, int j) {
254 return isLessByHash(i, j);
255 }
256
257 public int size() {
258 return size;
259 }
260 };
261
262 private final DataUtils.Sortable byResourceId = new DataUtils.Sortable() {
263 public void swap(int i, int j) {
264 int tmp = resourceIdsIndex[i];
265 resourceIdsIndex[i] = resourceIdsIndex[j];
266 resourceIdsIndex[j] = tmp;
267 }
268
269 public boolean isLess(int i, int j) {
270 String s1 = resourceIds[resourceIdsIndex[i]];
271 String s2 = resourceIds[resourceIdsIndex[j]];
272 return FastStringComparator.INSTANCE.compare(s1, s2) < 0;
273 }
274
275 public int size() {
276 return size;
277 }
278 };
279
280 }