001/*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2012 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019 */
020package org.sonar.duplications.index;
021
022import com.google.common.collect.Lists;
023import org.sonar.duplications.block.Block;
024import org.sonar.duplications.block.ByteArray;
025import org.sonar.duplications.utils.FastStringComparator;
026
027import java.util.Collection;
028import java.util.List;
029
030/**
031 * Provides an index optimized by memory.
032 * <p>
033 * Each object in Java has an overhead - see
034 * <a href="http://devblog.streamy.com/2009/07/24/determine-size-of-java-object-class/">"HOWTO: Determine the size of a Java Object or Class"</a>.
035 * So to optimize memory consumption, we use flat arrays, however this increases time of queries.
036 * During  usual detection of duplicates most time consuming method is a {@link #getByResourceId(String)}:
037 * around 50% of time spent in this class and number of invocations of this method is 1% of total invocations,
038 * however total time spent in this class less than 1 second for small projects and around 2 seconds for projects like JDK.
039 * </p>
040 * <p>
041 * Note that this implementation currently does not support deletion, however it's possible to implement.
042 * </p>
043 */
044public class PackedMemoryCloneIndex extends AbstractCloneIndex {
045
046  private static final int DEFAULT_INITIAL_CAPACITY = 1024;
047
048  private static final int BLOCK_INTS = 5;
049
050  private final int hashInts;
051
052  private final int blockInts;
053
054  /**
055   * Indicates that index requires sorting to perform queries.
056   */
057  private boolean sorted;
058
059  /**
060   * Current number of blocks in index.
061   */
062  private int size;
063
064  private String[] resourceIds;
065  private int[] blockData;
066
067  private int[] resourceIdsIndex;
068
069  private final Block.Builder blockBuilder = Block.builder();
070
071  public PackedMemoryCloneIndex() {
072    this(8, DEFAULT_INITIAL_CAPACITY);
073  }
074
075  /**
076   * @param hashBytes size of hash in bytes
077   * @param initialCapacity the initial capacity
078   */
079  public PackedMemoryCloneIndex(int hashBytes, int initialCapacity) {
080    this.sorted = false;
081    this.hashInts = hashBytes / 4;
082    this.blockInts = hashInts + BLOCK_INTS;
083    this.size = 0;
084    this.resourceIds = new String[initialCapacity];
085    this.blockData = new int[initialCapacity * blockInts];
086    this.resourceIdsIndex = new int[initialCapacity];
087  }
088
089  /**
090   * {@inheritDoc}
091   * <p>
092   * <strong>Note that this implementation does not guarantee that blocks would be sorted by index.</strong>
093   * </p>
094   */
095  public Collection<Block> getByResourceId(String resourceId) {
096    ensureSorted();
097
098    // prepare resourceId for binary search
099    resourceIds[size] = resourceId;
100    resourceIdsIndex[size] = size;
101
102    int index = DataUtils.binarySearch(byResourceId);
103
104    List<Block> result = Lists.newArrayList();
105    int realIndex = resourceIdsIndex[index];
106    while (index < size && FastStringComparator.INSTANCE.compare(resourceIds[realIndex], resourceId) == 0) {
107      // extract block (note that there is no need to extract resourceId)
108      int offset = realIndex * blockInts;
109      int[] hash = new int[hashInts];
110      for (int j = 0; j < hashInts; j++) {
111        hash[j] = blockData[offset++];
112      }
113      int indexInFile = blockData[offset++];
114      int firstLineNumber = blockData[offset++];
115      int lastLineNumber = blockData[offset++];
116      int startUnit = blockData[offset++];
117      int endUnit = blockData[offset];
118
119      Block block = blockBuilder
120          .setResourceId(resourceId)
121          .setBlockHash(new ByteArray(hash))
122          .setIndexInFile(indexInFile)
123          .setLines(firstLineNumber, lastLineNumber)
124          .setUnit(startUnit, endUnit)
125          .build();
126      result.add(block);
127
128      index++;
129      realIndex = resourceIdsIndex[index];
130    }
131    return result;
132  }
133
134  /**
135   * {@inheritDoc}
136   */
137  public Collection<Block> getBySequenceHash(ByteArray sequenceHash) {
138    ensureSorted();
139
140    // prepare hash for binary search
141    int[] hash = sequenceHash.toIntArray();
142    if (hash.length != hashInts) {
143      throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length);
144    }
145    int offset = size * blockInts;
146    for (int i = 0; i < hashInts; i++) {
147      blockData[offset++] = hash[i];
148    }
149
150    int index = DataUtils.binarySearch(byBlockHash);
151
152    List<Block> result = Lists.newArrayList();
153    while (index < size && !isLessByHash(size, index)) {
154      // extract block (note that there is no need to extract hash)
155      String resourceId = resourceIds[index];
156      offset = index * blockInts + hashInts;
157      int indexInFile = blockData[offset++];
158      int firstLineNumber = blockData[offset++];
159      int lastLineNumber = blockData[offset++];
160      int startUnit = blockData[offset++];
161      int endUnit = blockData[offset];
162
163      Block block = blockBuilder
164          .setResourceId(resourceId)
165          .setBlockHash(sequenceHash)
166          .setIndexInFile(indexInFile)
167          .setLines(firstLineNumber, lastLineNumber)
168          .setUnit(startUnit, endUnit)
169          .build();
170      result.add(block);
171      index++;
172    }
173    return result;
174  }
175
176  /**
177   * {@inheritDoc}
178   * <p>
179   * <strong>Note that this implementation allows insertion of two blocks with same index for one resource.</strong>
180   * </p>
181   */
182  public void insert(Block block) {
183    sorted = false;
184    ensureCapacity();
185
186    resourceIds[size] = block.getResourceId();
187
188    int[] hash = block.getBlockHash().toIntArray();
189    if (hash.length != hashInts) {
190      throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length);
191    }
192    int offset = size * blockInts;
193    for (int i = 0; i < hashInts; i++) {
194      blockData[offset++] = hash[i];
195    }
196    blockData[offset++] = block.getIndexInFile();
197    blockData[offset++] = block.getStartLine();
198    blockData[offset++] = block.getEndLine();
199    blockData[offset++] = block.getStartUnit();
200    blockData[offset] = block.getEndUnit();
201
202    size++;
203  }
204
205  /**
206   * Increases the capacity, if necessary.
207   */
208  private void ensureCapacity() {
209    if (size < resourceIds.length) {
210      return;
211    }
212    int newCapacity = (resourceIds.length * 3) / 2 + 1;
213    // Increase size of resourceIds
214    String[] oldResourceIds = resourceIds;
215    resourceIds = new String[newCapacity];
216    System.arraycopy(oldResourceIds, 0, resourceIds, 0, oldResourceIds.length);
217    // Increase size of blockData
218    int[] oldBlockData = blockData;
219    blockData = new int[newCapacity * blockInts];
220    System.arraycopy(oldBlockData, 0, blockData, 0, oldBlockData.length);
221    // Increase size of byResourceIndices (no need to copy old, because would be restored in method ensureSorted)
222    resourceIdsIndex = new int[newCapacity];
223    sorted = false;
224  }
225
226  /**
227   * Performs sorting, if necessary.
228   */
229  private void ensureSorted() {
230    if (sorted) {
231      return;
232    }
233
234    ensureCapacity();
235
236    DataUtils.sort(byBlockHash);
237    for (int i = 0; i < size; i++) {
238      resourceIdsIndex[i] = i;
239    }
240    DataUtils.sort(byResourceId);
241
242    sorted = true;
243  }
244
245  private boolean isLessByHash(int i, int j) {
246    i *= blockInts;
247    j *= blockInts;
248    for (int k = 0; k < hashInts; k++, i++, j++) {
249      if (blockData[i] < blockData[j]) {
250        return true;
251      }
252      if (blockData[i] > blockData[j]) {
253        return false;
254      }
255    }
256    return false;
257  }
258
259  private final DataUtils.Sortable byBlockHash = new DataUtils.Sortable() {
260    public void swap(int i, int j) {
261      String tmp = resourceIds[i];
262      resourceIds[i] = resourceIds[j];
263      resourceIds[j] = tmp;
264
265      i *= blockInts;
266      j *= blockInts;
267      for (int k = 0; k < blockInts; k++, i++, j++) {
268        int x = blockData[i];
269        blockData[i] = blockData[j];
270        blockData[j] = x;
271      }
272    }
273
274    public boolean isLess(int i, int j) {
275      return isLessByHash(i, j);
276    }
277
278    public int size() {
279      return size;
280    }
281  };
282
283  private final DataUtils.Sortable byResourceId = new DataUtils.Sortable() {
284    public void swap(int i, int j) {
285      int tmp = resourceIdsIndex[i];
286      resourceIdsIndex[i] = resourceIdsIndex[j];
287      resourceIdsIndex[j] = tmp;
288    }
289
290    public boolean isLess(int i, int j) {
291      String s1 = resourceIds[resourceIdsIndex[i]];
292      String s2 = resourceIds[resourceIdsIndex[j]];
293      return FastStringComparator.INSTANCE.compare(s1, s2) < 0;
294    }
295
296    public int size() {
297      return size;
298    }
299  };
300
301}