001/*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2012 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019 */
020package org.sonar.duplications.internal.pmd;
021
022import com.google.common.base.Throwables;
023import com.google.common.collect.ImmutableList;
024import net.sourceforge.pmd.cpd.SourceCode;
025import net.sourceforge.pmd.cpd.TokenEntry;
026import net.sourceforge.pmd.cpd.Tokenizer;
027import net.sourceforge.pmd.cpd.Tokens;
028import org.sonar.duplications.block.Block;
029import org.sonar.duplications.cpd.FileCodeLoaderWithoutCache;
030
031import java.io.File;
032import java.io.IOException;
033import java.util.List;
034
035/**
036 * Bridge, which allows to convert list of {@link TokenEntry} produced by {@link Tokenizer} into list of {@link TokensLine}s.
037 */
038public class TokenizerBridge {
039
040  private final Tokenizer tokenizer;
041  private final String encoding;
042  private final PmdBlockChunker blockBuilder;
043
044  public TokenizerBridge(Tokenizer tokenizer, String encoding, int blockSize) {
045    this.tokenizer = tokenizer;
046    this.encoding = encoding;
047    this.blockBuilder = new PmdBlockChunker(blockSize);
048  }
049
050  // TODO remove from here
051  public List<Block> chunk(String resourceId, File file) {
052    return blockBuilder.chunk(resourceId, chunk(file));
053  }
054
055  public List<TokensLine> chunk(File file) {
056    SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding));
057    Tokens tokens = new Tokens();
058    TokenEntry.clearImages();
059    try {
060      tokenizer.tokenize(sourceCode, tokens);
061    } catch (IOException e) {
062      throw Throwables.propagate(e);
063    }
064    TokenEntry.clearImages();
065    return convert(tokens.getTokens());
066  }
067
068  /**
069   * We expect that implementation of {@link Tokenizer} is correct:
070   * tokens ordered by occurrence in source code and last token is EOF.
071   */
072  private static List<TokensLine> convert(List<TokenEntry> tokens) {
073    ImmutableList.Builder<TokensLine> result = ImmutableList.builder();
074    StringBuilder sb = new StringBuilder();
075    int startLine = Integer.MIN_VALUE;
076    int startIndex = 0;
077    int currentIndex = 0;
078    for (TokenEntry token : tokens) {
079      if (token != TokenEntry.EOF) {
080        String value = token.getValue();
081        int line = token.getBeginLine();
082        if (line != startLine) {
083          addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
084          startIndex = currentIndex + 1;
085          startLine = line;
086        }
087        currentIndex++;
088        sb.append(value);
089      }
090    }
091    addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
092    return result.build();
093  }
094
095  private static void addNewTokensLine(ImmutableList.Builder<TokensLine> result, int startUnit, int endUnit, int startLine, StringBuilder sb) {
096    if (sb.length() != 0) {
097      result.add(new TokensLine(startUnit, endUnit, startLine, sb.toString().hashCode()));
098      sb.setLength(0);
099    }
100  }
101
102}