001/* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2012 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020package org.sonar.duplications.internal.pmd; 021 022import com.google.common.base.Throwables; 023import com.google.common.collect.ImmutableList; 024import net.sourceforge.pmd.cpd.SourceCode; 025import net.sourceforge.pmd.cpd.TokenEntry; 026import net.sourceforge.pmd.cpd.Tokenizer; 027import net.sourceforge.pmd.cpd.Tokens; 028import org.sonar.duplications.block.Block; 029import org.sonar.duplications.cpd.FileCodeLoaderWithoutCache; 030 031import java.io.File; 032import java.io.IOException; 033import java.util.List; 034 035/** 036 * Bridge, which allows to convert list of {@link TokenEntry} produced by {@link Tokenizer} into list of {@link TokensLine}s. 037 */ 038public class TokenizerBridge { 039 040 private final Tokenizer tokenizer; 041 private final String encoding; 042 private final PmdBlockChunker blockBuilder; 043 044 public TokenizerBridge(Tokenizer tokenizer, String encoding, int blockSize) { 045 this.tokenizer = tokenizer; 046 this.encoding = encoding; 047 this.blockBuilder = new PmdBlockChunker(blockSize); 048 } 049 050 // TODO remove from here 051 public List<Block> chunk(String resourceId, File file) { 052 return blockBuilder.chunk(resourceId, chunk(file)); 053 } 054 055 public List<TokensLine> chunk(File file) { 056 SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding)); 057 Tokens tokens = new Tokens(); 058 TokenEntry.clearImages(); 059 try { 060 tokenizer.tokenize(sourceCode, tokens); 061 } catch (IOException e) { 062 throw Throwables.propagate(e); 063 } 064 TokenEntry.clearImages(); 065 return convert(tokens.getTokens()); 066 } 067 068 /** 069 * We expect that implementation of {@link Tokenizer} is correct: 070 * tokens ordered by occurrence in source code and last token is EOF. 071 */ 072 private static List<TokensLine> convert(List<TokenEntry> tokens) { 073 ImmutableList.Builder<TokensLine> result = ImmutableList.builder(); 074 StringBuilder sb = new StringBuilder(); 075 int startLine = Integer.MIN_VALUE; 076 int startIndex = 0; 077 int currentIndex = 0; 078 for (TokenEntry token : tokens) { 079 if (token != TokenEntry.EOF) { 080 String value = token.getValue(); 081 int line = token.getBeginLine(); 082 if (line != startLine) { 083 addNewTokensLine(result, startIndex, currentIndex, startLine, sb); 084 startIndex = currentIndex + 1; 085 startLine = line; 086 } 087 currentIndex++; 088 sb.append(value); 089 } 090 } 091 addNewTokensLine(result, startIndex, currentIndex, startLine, sb); 092 return result.build(); 093 } 094 095 private static void addNewTokensLine(ImmutableList.Builder<TokensLine> result, int startUnit, int endUnit, int startLine, StringBuilder sb) { 096 if (sb.length() != 0) { 097 result.add(new TokensLine(startUnit, endUnit, startLine, sb.toString().hashCode())); 098 sb.setLength(0); 099 } 100 } 101 102}