001 /*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2012 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
019 */
020 package org.sonar.duplications.internal.pmd;
021
022 import com.google.common.base.Throwables;
023 import com.google.common.collect.ImmutableList;
024 import net.sourceforge.pmd.cpd.SourceCode;
025 import net.sourceforge.pmd.cpd.TokenEntry;
026 import net.sourceforge.pmd.cpd.Tokenizer;
027 import net.sourceforge.pmd.cpd.Tokens;
028 import org.sonar.duplications.block.Block;
029 import org.sonar.duplications.cpd.FileCodeLoaderWithoutCache;
030
031 import java.io.File;
032 import java.io.IOException;
033 import java.util.List;
034
035 /**
036 * Bridge, which allows to convert list of {@link TokenEntry} produced by {@link Tokenizer} into list of {@link TokensLine}s.
037 */
038 public class TokenizerBridge {
039
040 private final Tokenizer tokenizer;
041 private final String encoding;
042 private final PmdBlockChunker blockBuilder;
043
044 public TokenizerBridge(Tokenizer tokenizer, String encoding, int blockSize) {
045 this.tokenizer = tokenizer;
046 this.encoding = encoding;
047 this.blockBuilder = new PmdBlockChunker(blockSize);
048 }
049
050 // TODO remove from here
051 public List<Block> chunk(String resourceId, File file) {
052 return blockBuilder.chunk(resourceId, chunk(file));
053 }
054
055 public List<TokensLine> chunk(File file) {
056 SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding));
057 Tokens tokens = new Tokens();
058 TokenEntry.clearImages();
059 try {
060 tokenizer.tokenize(sourceCode, tokens);
061 } catch (IOException e) {
062 throw Throwables.propagate(e);
063 }
064 TokenEntry.clearImages();
065 return convert(tokens.getTokens());
066 }
067
068 /**
069 * We expect that implementation of {@link Tokenizer} is correct:
070 * tokens ordered by occurrence in source code and last token is EOF.
071 */
072 private static List<TokensLine> convert(List<TokenEntry> tokens) {
073 ImmutableList.Builder<TokensLine> result = ImmutableList.builder();
074 StringBuilder sb = new StringBuilder();
075 int startLine = Integer.MIN_VALUE;
076 int startIndex = 0;
077 int currentIndex = 0;
078 for (TokenEntry token : tokens) {
079 if (token != TokenEntry.EOF) {
080 String value = token.getValue();
081 int line = token.getBeginLine();
082 if (line != startLine) {
083 addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
084 startIndex = currentIndex + 1;
085 startLine = line;
086 }
087 currentIndex++;
088 sb.append(value);
089 }
090 }
091 addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
092 return result.build();
093 }
094
095 private static void addNewTokensLine(ImmutableList.Builder<TokensLine> result, int startUnit, int endUnit, int startLine, StringBuilder sb) {
096 if (sb.length() != 0) {
097 result.add(new TokensLine(startUnit, endUnit, startLine, sb.toString().hashCode()));
098 sb.setLength(0);
099 }
100 }
101
102 }