001 /*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2012 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
019 */
020 package org.sonar.duplications.block;
021
022 import com.google.common.collect.Lists;
023 import org.sonar.duplications.statement.Statement;
024
025 import java.util.Collections;
026 import java.util.List;
027
028 /**
029 * Creates blocks from statements, each block will contain specified number of statements (<code>blockSize</code>) and 64-bits (8-bytes) hash value.
030 * Hash value computed using
031 * <a href="http://en.wikipedia.org/wiki/Rolling_hash#Rabin-Karp_rolling_hash">Rabin-Karp rolling hash</a> :
032 * <blockquote><pre>
033 * s[0]*31^(blockSize-1) + s[1]*31^(blockSize-2) + ... + s[blockSize-1]
034 * </pre></blockquote>
035 * using <code>long</code> arithmetic, where <code>s[i]</code>
036 * is the hash code of <code>String</code> (which is cached) for statement with number i.
037 * Thus running time - O(N), where N - number of statements.
038 * Implementation fully thread-safe.
039 */
040 public class BlockChunker {
041
042 private static final long PRIME_BASE = 31;
043
044 private final int blockSize;
045 private final long power;
046
047 public BlockChunker(int blockSize) {
048 this.blockSize = blockSize;
049
050 long pow = 1;
051 for (int i = 0; i < blockSize - 1; i++) {
052 pow = pow * PRIME_BASE;
053 }
054 this.power = pow;
055 }
056
057 public List<Block> chunk(String resourceId, List<Statement> statements) {
058 if (statements.size() < blockSize) {
059 return Collections.emptyList();
060 }
061 Statement[] statementsArr = statements.toArray(new Statement[statements.size()]);
062 List<Block> blocks = Lists.newArrayListWithCapacity(statementsArr.length - blockSize + 1);
063 long hash = 0;
064 int first = 0;
065 int last = 0;
066 for (; last < blockSize - 1; last++) {
067 hash = hash * PRIME_BASE + statementsArr[last].getValue().hashCode();
068 }
069 Block.Builder blockBuilder = Block.builder().setResourceId(resourceId);
070 for (; last < statementsArr.length; last++, first++) {
071 Statement firstStatement = statementsArr[first];
072 Statement lastStatement = statementsArr[last];
073 // add last statement to hash
074 hash = hash * PRIME_BASE + lastStatement.getValue().hashCode();
075 // create block
076 Block block = blockBuilder.setBlockHash(new ByteArray(hash))
077 .setIndexInFile(first)
078 .setLines(firstStatement.getStartLine(), lastStatement.getEndLine())
079 .build();
080 blocks.add(block);
081 // remove first statement from hash
082 hash -= power * firstStatement.getValue().hashCode();
083 }
084 return blocks;
085 }
086
087 public int getBlockSize() {
088 return blockSize;
089 }
090
091 }