001    /*
002     * Sonar, open source software quality management tool.
003     * Copyright (C) 2008-2012 SonarSource
004     * mailto:contact AT sonarsource DOT com
005     *
006     * Sonar is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 3 of the License, or (at your option) any later version.
010     *
011     * Sonar is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public
017     * License along with Sonar; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019     */
020    package org.sonar.duplications.internal.pmd;
021    
022    import com.google.common.base.Throwables;
023    import com.google.common.collect.ImmutableList;
024    import net.sourceforge.pmd.cpd.SourceCode;
025    import net.sourceforge.pmd.cpd.TokenEntry;
026    import net.sourceforge.pmd.cpd.Tokenizer;
027    import net.sourceforge.pmd.cpd.Tokens;
028    import org.sonar.duplications.block.Block;
029    import org.sonar.duplications.cpd.FileCodeLoaderWithoutCache;
030    
031    import java.io.File;
032    import java.io.IOException;
033    import java.util.List;
034    
035    /**
036     * Bridge, which allows to convert list of {@link TokenEntry} produced by {@link Tokenizer} into list of {@link TokensLine}s.
037     */
038    public class TokenizerBridge {
039    
040      private final Tokenizer tokenizer;
041      private final String encoding;
042      private final PmdBlockChunker blockBuilder;
043    
044      public TokenizerBridge(Tokenizer tokenizer, String encoding, int blockSize) {
045        this.tokenizer = tokenizer;
046        this.encoding = encoding;
047        this.blockBuilder = new PmdBlockChunker(blockSize);
048      }
049    
050      // TODO remove from here
051      public List<Block> chunk(String resourceId, File file) {
052        return blockBuilder.chunk(resourceId, chunk(file));
053      }
054    
055      public List<TokensLine> chunk(File file) {
056        SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(file, encoding));
057        Tokens tokens = new Tokens();
058        TokenEntry.clearImages();
059        try {
060          tokenizer.tokenize(sourceCode, tokens);
061        } catch (IOException e) {
062          throw Throwables.propagate(e);
063        }
064        TokenEntry.clearImages();
065        return convert(tokens.getTokens());
066      }
067    
068      /**
069       * We expect that implementation of {@link Tokenizer} is correct:
070       * tokens ordered by occurrence in source code and last token is EOF.
071       */
072      private static List<TokensLine> convert(List<TokenEntry> tokens) {
073        ImmutableList.Builder<TokensLine> result = ImmutableList.builder();
074        StringBuilder sb = new StringBuilder();
075        int startLine = Integer.MIN_VALUE;
076        int startIndex = 0;
077        int currentIndex = 0;
078        for (TokenEntry token : tokens) {
079          if (token != TokenEntry.EOF) {
080            String value = token.getValue();
081            int line = token.getBeginLine();
082            if (line != startLine) {
083              addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
084              startIndex = currentIndex + 1;
085              startLine = line;
086            }
087            currentIndex++;
088            sb.append(value);
089          }
090        }
091        addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
092        return result.build();
093      }
094    
095      private static void addNewTokensLine(ImmutableList.Builder<TokensLine> result, int startUnit, int endUnit, int startLine, StringBuilder sb) {
096        if (sb.length() != 0) {
097          result.add(new TokensLine(startUnit, endUnit, startLine, sb.toString().hashCode()));
098          sb.setLength(0);
099        }
100      }
101    
102    }