001 /* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2011 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020 021 /** 022 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html 023 */ 024 package net.sourceforge.pmd.cpd; 025 026 import java.io.BufferedReader; 027 import java.io.CharArrayReader; 028 import java.util.NoSuchElementException; 029 import java.util.StringTokenizer; 030 031 /** 032 * This class does a best-guess try-anything tokenization. 033 * 034 * @author jheintz 035 */ 036 public class AnyTokenizer implements Tokenizer { 037 public static final String TOKENS = " \t!#$%^&*(){}-=+<>/\\`~;:"; 038 039 public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { 040 StringBuffer sb = sourceCode.getCodeBuffer(); 041 BufferedReader reader = new BufferedReader(new CharArrayReader(sb.toString().toCharArray())); 042 try { 043 int lineNumber = 1; 044 String line = reader.readLine(); 045 while (line != null) { 046 StringTokenizer tokenizer = new StringTokenizer(line, TOKENS, true); 047 try { 048 String token = tokenizer.nextToken(); 049 while (token != null) { 050 if (!token.equals(" ") && !token.equals("\t")) { 051 tokenEntries.add(new TokenEntry(token, sourceCode.getFileName(), lineNumber)); 052 } 053 token = tokenizer.nextToken(); 054 } 055 } catch (NoSuchElementException ex) { 056 // done with tokens 057 } 058 // advance iteration variables 059 line = reader.readLine(); 060 lineNumber++; 061 } 062 } catch (Exception ex) { 063 ex.printStackTrace(); 064 } finally { 065 try { 066 reader.close(); 067 } catch (Exception ex) { 068 } 069 tokenEntries.add(TokenEntry.getEOF()); 070 } 071 } 072 }