001 /* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2011 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020 021 /** 022 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html 023 */ 024 package org.sonar.duplications.cpd; 025 026 import java.util.ArrayList; 027 import java.util.Collections; 028 import java.util.HashMap; 029 import java.util.HashSet; 030 import java.util.Iterator; 031 import java.util.List; 032 import java.util.Map; 033 import java.util.Set; 034 035 import net.sourceforge.pmd.cpd.TokenEntry; 036 037 public class MatchCollector { 038 039 private MatchAlgorithm ma; 040 private Map<Match.MatchCode, Match> startMap = new HashMap<Match.MatchCode, Match>(); 041 private Map<String, List<Match>> fileMap = new HashMap<String, List<Match>>(); 042 043 public MatchCollector(MatchAlgorithm ma) { 044 this.ma = ma; 045 } 046 047 public void collect(List<TokenEntry> marks) { 048 // first get a pairwise collection of all maximal matches 049 for (int i = 0; i < marks.size() - 1; i++) { 050 TokenEntry mark1 = marks.get(i); 051 for (int j = i + 1; j < marks.size(); j++) { 052 TokenEntry mark2 = marks.get(j); 053 int diff = mark1.getIndex() - mark2.getIndex(); 054 if ( -diff < ma.getMinimumTileSize()) { 055 continue; 056 } 057 if (hasPreviousDupe(mark1, mark2)) { 058 continue; 059 } 060 061 // "match too small" check 062 int dupes = countDuplicateTokens(mark1, mark2); 063 if (dupes < ma.getMinimumTileSize()) { 064 continue; 065 } 066 // is it still too close together 067 if (diff + dupes >= 1) { 068 continue; 069 } 070 determineMatch(mark1, mark2, dupes); 071 } 072 } 073 } 074 075 @SuppressWarnings("PMD.CompareObjectsWithEquals") 076 public List<Match> getMatches() { 077 List<Match> matchList = new ArrayList<Match>(startMap.values()); 078 Collections.sort(matchList); 079 Set<Match.MatchCode> matchSet = new HashSet<Match.MatchCode>(); 080 Match.MatchCode matchCode = new Match.MatchCode(); 081 for (int i = matchList.size(); i > 1; i--) { 082 Match match1 = matchList.get(i - 1); 083 TokenEntry mark1 = match1.getMarkSet().iterator().next(); 084 matchSet.clear(); 085 matchSet.add(match1.getMatchCode()); 086 for (int j = i - 1; j > 0; j--) { 087 Match match2 = matchList.get(j - 1); 088 if (match1.getTokenCount() != match2.getTokenCount()) { 089 break; 090 } 091 TokenEntry mark2 = null; 092 for (Iterator<TokenEntry> iter = match2.getMarkSet().iterator(); iter.hasNext();) { 093 mark2 = iter.next(); 094 if (mark2 != mark1) { 095 break; 096 } 097 } 098 int dupes = countDuplicateTokens(mark1, mark2); 099 if (dupes < match1.getTokenCount()) { 100 break; 101 } 102 matchSet.add(match2.getMatchCode()); 103 match1.getMarkSet().addAll(match2.getMarkSet()); 104 matchList.remove(i - 2); 105 i--; 106 } 107 if (matchSet.size() == 1) { 108 continue; 109 } 110 // prune the mark set 111 Set<TokenEntry> pruned = match1.getMarkSet(); 112 boolean done = false; 113 ArrayList<TokenEntry> a1 = new ArrayList<TokenEntry>(match1.getMarkSet()); 114 Collections.sort(a1); 115 for (int outer = 0; outer < a1.size() - 1 && !done; outer++) { 116 TokenEntry cmark1 = a1.get(outer); 117 for (int inner = outer + 1; inner < a1.size() && !done; inner++) { 118 TokenEntry cmark2 = a1.get(inner); 119 matchCode.setFirst(cmark1.getIndex()); 120 matchCode.setSecond(cmark2.getIndex()); 121 if ( !matchSet.contains(matchCode)) { 122 if (pruned.size() > 2) { 123 pruned.remove(cmark2); 124 } 125 if (pruned.size() == 2) { 126 done = true; 127 } 128 } 129 } 130 } 131 } 132 return matchList; 133 } 134 135 /** 136 * A greedy algorithm for determining non-overlapping matches 137 */ 138 private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) { 139 Match match = new Match(dupes, mark1, mark2); 140 String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID(); 141 List<Match> pairMatches = fileMap.get(fileKey); 142 if (pairMatches == null) { 143 pairMatches = new ArrayList<Match>(); 144 fileMap.put(fileKey, pairMatches); 145 } 146 boolean add = true; 147 for (int i = 0; i < pairMatches.size(); i++) { 148 Match other = pairMatches.get(i); 149 if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex() > 0) { 150 boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0; 151 if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0)) 152 || ( !ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) { 153 if (other.getTokenCount() >= match.getTokenCount()) { 154 add = false; 155 break; 156 } else { 157 pairMatches.remove(i); 158 startMap.remove(other.getMatchCode()); 159 } 160 } 161 } 162 } 163 if (add) { 164 pairMatches.add(match); 165 startMap.put(match.getMatchCode(), match); 166 } 167 } 168 169 private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) { 170 if (mark1.getIndex() == 0) { 171 return false; 172 } 173 return !matchEnded(ma.tokenAt( -1, mark1), ma.tokenAt( -1, mark2)); 174 } 175 176 private int countDuplicateTokens(TokenEntry mark1, TokenEntry mark2) { 177 int index = 0; 178 while ( !matchEnded(ma.tokenAt(index, mark1), ma.tokenAt(index, mark2))) { 179 index++; 180 } 181 return index; 182 } 183 184 private boolean matchEnded(TokenEntry token1, TokenEntry token2) { 185 return token1.getIdentifier() != token2.getIdentifier() || token1 == TokenEntry.EOF || token2 == TokenEntry.EOF; 186 } 187 }