001 /* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2012 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020 package org.sonar.duplications.block; 021 022 import com.google.common.collect.Lists; 023 import org.sonar.duplications.statement.Statement; 024 025 import java.util.Collections; 026 import java.util.List; 027 028 /** 029 * Creates blocks from statements, each block will contain specified number of statements (<code>blockSize</code>) and 64-bits (8-bytes) hash value. 030 * Hash value computed using 031 * <a href="http://en.wikipedia.org/wiki/Rolling_hash#Rabin-Karp_rolling_hash">Rabin-Karp rolling hash</a> : 032 * <blockquote><pre> 033 * s[0]*31^(blockSize-1) + s[1]*31^(blockSize-2) + ... + s[blockSize-1] 034 * </pre></blockquote> 035 * using <code>long</code> arithmetic, where <code>s[i]</code> 036 * is the hash code of <code>String</code> (which is cached) for statement with number i. 037 * Thus running time - O(N), where N - number of statements. 038 * Implementation fully thread-safe. 039 */ 040 public class BlockChunker { 041 042 private static final long PRIME_BASE = 31; 043 044 private final int blockSize; 045 private final long power; 046 047 public BlockChunker(int blockSize) { 048 this.blockSize = blockSize; 049 050 long pow = 1; 051 for (int i = 0; i < blockSize - 1; i++) { 052 pow = pow * PRIME_BASE; 053 } 054 this.power = pow; 055 } 056 057 public List<Block> chunk(String resourceId, List<Statement> statements) { 058 if (statements.size() < blockSize) { 059 return Collections.emptyList(); 060 } 061 Statement[] statementsArr = statements.toArray(new Statement[statements.size()]); 062 List<Block> blocks = Lists.newArrayListWithCapacity(statementsArr.length - blockSize + 1); 063 long hash = 0; 064 int first = 0; 065 int last = 0; 066 for (; last < blockSize - 1; last++) { 067 hash = hash * PRIME_BASE + statementsArr[last].getValue().hashCode(); 068 } 069 Block.Builder blockBuilder = Block.builder().setResourceId(resourceId); 070 for (; last < statementsArr.length; last++, first++) { 071 Statement firstStatement = statementsArr[first]; 072 Statement lastStatement = statementsArr[last]; 073 // add last statement to hash 074 hash = hash * PRIME_BASE + lastStatement.getValue().hashCode(); 075 // create block 076 Block block = blockBuilder.setBlockHash(new ByteArray(hash)) 077 .setIndexInFile(first) 078 .setLines(firstStatement.getStartLine(), lastStatement.getEndLine()) 079 .build(); 080 blocks.add(block); 081 // remove first statement from hash 082 hash -= power * firstStatement.getValue().hashCode(); 083 } 084 return blocks; 085 } 086 087 public int getBlockSize() { 088 return blockSize; 089 } 090 091 }