001 package net.sf.logdistiller.plugins; 002 003 /* 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017 import java.io.*; 018 import java.util.*; 019 import java.util.regex.Matcher; 020 import java.util.regex.Pattern; 021 import java.util.regex.PatternSyntaxException; 022 023 import org.apache.commons.lang.StringUtils; 024 025 import net.sf.logdistiller.LogDistillation; 026 import net.sf.logdistiller.LogDistiller; 027 import net.sf.logdistiller.LogEvent; 028 import net.sf.logdistiller.LogType; 029 import net.sf.logdistiller.Plugin; 030 import net.sf.logdistiller.PluginConfigException; 031 import net.sf.logdistiller.ReportFormat; 032 033 /** 034 * Counts the frequency of each attribute's value. 035 * <p> 036 * Parameters: 037 * </p> 038 * <ul> 039 * <li><b>value</b> (deprecated since 1.1): definition of the value that will be taken into account 040 * <ul> 041 * <li><code>attribute</code> (mandatory): the attribute's name 042 * <li><code>regexp</code>: use a regexp to extract value taken in account for the first attribute 043 * <li><code>regexp.group</code> (default: -1): which group to extract? If negative, all groups will be concatenated 044 * <li><code>attribute2</code> (default: none): second attribute to append 045 * <li><code>attribute3</code> (default: none): third attribute to append 046 * </ul> 047 * </li> 048 * <li><b>value</b>: definition of the value that will be taken into account 049 * <ul> 050 * <li><code>attributes</code> (mandatory): the attributes' names, comma separated (no count limit) 051 * </li> 052 * </ul> 053 * <li><b>sorting options</b> 054 * <ul> 055 * <li><code>sortItem</code> (default: <code>count</code>): sort on which item? <code>count</code>, <code>value</code>, 056 * <code>valueLength</code> 057 * <li><code>reverseOrder</code> (default: <code>false</code>): by default, most frequent value are displayed first 058 * </ul> 059 * </li> 060 * <li><b>reporting</b> 061 * <ul> 062 * <li><code>valueDescriptions</code>: description to insert in the report instead of corresponding value (in java 063 * properties format) 064 * <li><code>maxGlobalReport</code> (default: 5): maximum number of values inserted in global report 065 * <li><code>maxGroupReport</code> (default: 25): maximum number of values inserted in group report 066 * </ul> 067 * </li> 068 * </ul> 069 * 070 * @see SamplingPlugin 071 * @since 0.6 072 */ 073 public class FreqPlugin 074 extends LogDistillation.Plugin 075 { 076 private static final long serialVersionUID = -5162563816094662936L; 077 078 public final static String ID = "freq"; 079 080 public final static Plugin TYPE = new Type(); 081 082 private final static String[] SORT_ITEM_VALUES = { "count", "value", "valueLength" }; 083 084 private final static String[] SORT_ITEM_DESCRIPTIONS = { "count", "attribute's value", "attribute's value length" }; 085 086 private final static FreqOrder[] SORT_ITEM_COMPARATORS = 087 { new CountOrder(), new ValueOrder(), new ValueLengthOrder() }; 088 089 private final Map freqs = new HashMap(); // key = attribute's value (String), value = Freq instance 090 091 protected SortedSet sorted; // sorted Freq 092 093 /** @deprecated since 1.1, use attributes instead */ 094 private final String attribute; 095 096 /** @since 0.8 097 * @deprecated since 1.1, use attributes instead */ 098 private final String attribute2; 099 100 /** @since 0.8 101 * @deprecated since 1.1, use attributes instead */ 102 private final String attribute3; 103 104 /** @deprecated since 1.1, use attributes instead */ 105 private transient LogType.AttributeInfo attributeInfo; 106 107 /** @deprecated since 1.1, use attributes instead */ 108 private transient LogType.AttributeInfo attributeInfo2; 109 110 /** @deprecated since 1.1, use attributes instead */ 111 private transient LogType.AttributeInfo attributeInfo3; 112 113 private final int sortItem; // index in SORT_ITEM_VALUES 114 115 private final boolean reverseOrder; 116 117 private final Properties valueDescriptions = new Properties(); 118 119 private final int maxGlobalReport; 120 121 private final int maxGroupReport; 122 123 /** @deprecated since 1.1, use attributes instead */ 124 private final String regexpString; 125 126 /** @deprecated since 1.1, use attributes instead */ 127 private transient Pattern regexp; 128 129 /** @deprecated since 1.1, use attributes instead */ 130 private final int regexpGroup; 131 132 /** @since 1.1 */ 133 protected final String[] attributes; 134 135 protected transient LogType.AttributeInfo[] attributesInfo; 136 137 public FreqPlugin( LogDistiller.Plugin definition ) 138 { 139 this( definition, true ); 140 } 141 142 protected FreqPlugin( LogDistiller.Plugin definition, boolean checkSampling ) 143 { 144 super( definition ); 145 attribute = definition.getParam( "attribute" ); 146 attribute2 = definition.getParam( "attribute2", null ); 147 attribute3 = definition.getParam( "attribute3", null ); 148 regexpString = definition.getParam( "regexp" ); 149 updatePattern(); 150 regexpGroup = Integer.parseInt( definition.getParam( "regexp.group", "-1" ) ); 151 String attrs = definition.getParam( "attributes" ); 152 attributes = ( attrs == null ) ? null : StringUtils.split( attrs, ", " ); 153 154 String attr = definition.getParam( "sortItem" ); 155 sortItem = ( attr == null ) ? 0 : Arrays.binarySearch( SORT_ITEM_VALUES, attr ); 156 attr = definition.getParam( "reverseOrder", "false" ); 157 reverseOrder = Boolean.valueOf( attr ).booleanValue(); 158 attr = definition.getParam( "valueDescriptions" ); 159 if ( attr != null ) 160 { 161 try 162 { 163 valueDescriptions.load( new ByteArrayInputStream( attr.getBytes( "ISO-8859-1" ) ) ); 164 } 165 catch ( IOException ioe ) 166 { 167 ioe.printStackTrace(); 168 } 169 } 170 maxGlobalReport = Integer.parseInt( definition.getParam( "maxGlobalReport", "5" ) ); 171 maxGroupReport = Integer.parseInt( definition.getParam( "maxGroupReport", "25" ) ); 172 173 if ( checkSampling 174 && ( ( definition.getParam( "sampling.maxCount" ) != null ) 175 || ( definition.getParam( "sampling.maxSize" ) != null ) || ( definition.getParam( "sampling.filename" ) != null ) ) ) 176 { 177 LogDistiller ld = definition.getGroup().getLogdistiller(); 178 ld.addWarning( "sampling features (sampling.* parameters) disabled from freq plugin: " 179 + "moved to sampling plugin in LogDistiller 0.9" ); 180 } 181 182 if ( attribute != null ) 183 { 184 LogDistiller ld = definition.getGroup().getLogdistiller(); 185 ld.addWarning( "'attribute' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use 'attributes'" ); 186 if ( regexp != null ) 187 { 188 ld.addWarning( "'regexp' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use extended attributes" ); 189 } 190 } 191 } 192 193 private void readObject( ObjectInputStream in ) 194 throws IOException, ClassNotFoundException 195 { 196 in.defaultReadObject(); 197 updatePattern(); 198 } 199 200 /** @deprecated since 1.1, use attributes instead */ 201 private void updatePattern() 202 { 203 try 204 { 205 regexp = ( regexpString == null ) ? null : Pattern.compile( regexpString ); 206 } 207 catch ( PatternSyntaxException pse ) 208 { 209 throw new PluginConfigException( "regexp parameter contains bad pattern '" + regexpString 210 + "': caused an exception " + pse.getMessage() ); 211 } 212 } 213 214 public void begin( File destinationDirectory ) 215 throws FileNotFoundException 216 { 217 } 218 219 public void addLogEvent( LogEvent logEvent ) 220 throws IOException 221 { 222 String[] values = extractAttributes( logEvent ); 223 224 String key = Freq.asKey( values ); 225 Freq freq = (Freq) freqs.get( key ); 226 if ( freq == null ) 227 { 228 freq = new Freq( key, values ); 229 freqs.put( key, freq ); 230 } 231 232 addLogEventToFreq( logEvent, freq ); 233 } 234 235 private String[] extractAttributes( LogEvent logEvent ) 236 { 237 if ( attributes == null ) 238 { 239 return new String[]{ oldComputeValue( logEvent ) }; 240 } 241 242 // initialize attributesInfo if necessary 243 if ( attributesInfo == null ) 244 { 245 attributesInfo = new LogType.AttributeInfo[attributes.length]; 246 LogType.Description description = logEvent.getFactory().getDescription(); 247 248 for ( int i = 0; i < attributes.length; i++ ) 249 { 250 attributesInfo[i] = description.getAttributeInfo( attributes[i] ); 251 } 252 } 253 254 String[] values = new String[attributesInfo.length]; 255 for ( int i = 0; i < attributesInfo.length; i++ ) 256 { 257 values[i] = logEvent.getValue( attributesInfo[i] ); 258 } 259 return values; 260 } 261 262 /** @deprecated since 1.1, use attributes instead */ 263 private String oldComputeValue( LogEvent logEvent ) 264 { 265 // initialize attributeInfo if necessary 266 if ( attributeInfo == null ) 267 { 268 LogType.Description description = logEvent.getFactory().getDescription(); 269 attributeInfo = description.getAttributeInfo( attribute ); 270 if ( StringUtils.isNotEmpty( attribute2 ) ) 271 { 272 attributeInfo2 = description.getAttributeInfo( attribute2 ); 273 if ( StringUtils.isNotEmpty( attribute3 ) ) 274 { 275 attributeInfo3 = description.getAttributeInfo( attribute3 ); 276 } 277 } 278 } 279 280 // calculate freq value 281 String value = logEvent.getValue( attributeInfo ); 282 if ( attributeInfo2 != null ) 283 { 284 value += '-' + logEvent.getValue( attributeInfo2 ); 285 if ( attributeInfo3 != null ) 286 { 287 value += '-' + logEvent.getValue( attributeInfo3 ); 288 } 289 } 290 if ( regexp != null ) 291 { 292 Matcher matcher = regexp.matcher( value ); 293 if ( matcher.find() && ( regexpGroup <= matcher.groupCount() ) ) 294 { 295 if ( regexpGroup >= 0 ) 296 { 297 value = matcher.group( regexpGroup ); 298 } 299 else if ( matcher.groupCount() > 1 ) 300 { 301 StringBuffer buff = new StringBuffer( matcher.group( 1 ) ); 302 for ( int i = 2; i <= matcher.groupCount(); i++ ) 303 { 304 buff.append( '-' ).append( matcher.group( i ) ); 305 } 306 value = buff.toString(); 307 } 308 else if ( matcher.groupCount() == 1 ) 309 { 310 value = matcher.group( 1 ); 311 } 312 else 313 { 314 value = matcher.group( 0 ); 315 } 316 } 317 else 318 { 319 value = ""; 320 } 321 if ( value == null ) 322 { 323 value = ""; 324 } 325 } 326 327 return value; 328 } 329 330 /** 331 * @since 1.1 332 */ 333 protected void addLogEventToFreq( LogEvent logEvent, Freq freq ) 334 throws IOException 335 { 336 // increase freq count 337 freq.count++; 338 int bytes = logEvent.getRawLog().length() + LogDistillation.LINE_SEPARATOR.length(); 339 freq.bytes += bytes; 340 } 341 342 public void end() 343 throws IOException 344 { 345 FreqOrder comparator = SORT_ITEM_COMPARATORS[sortItem]; 346 comparator = reverseOrder ? new ReverseOrder( comparator ) : comparator; 347 sorted = new TreeSet( comparator ); 348 sorted.addAll( freqs.values() ); 349 } 350 351 public void appendGroupReport( ReportFormat.PluginReport report ) 352 { 353 appendReport( report, maxGroupReport ); 354 } 355 356 public void appendGlobalReport( ReportFormat.PluginReport report ) 357 { 358 appendReport( report, maxGlobalReport ); 359 } 360 361 SortedSet getSorted() 362 { 363 return sorted; 364 } 365 366 private String describeResult( int maxCount ) 367 { 368 String count = 369 ( ( maxCount < 0 ) || ( maxCount >= sorted.size() ) ) ? String.valueOf( sorted.size() ) 370 : ( maxCount + "/" + sorted.size() ); 371 String attrs; 372 if ( attributes == null ) 373 { 374 attrs = attribute 375 + ( ( attribute2 == null ) ? "" : 376 ( "'-'" + attribute2 + ( ( attribute3 == null ) ? "" : ( "'-'" + attribute3 ) ) ) ); 377 } 378 else 379 { 380 attrs = StringUtils.join( attributes, "'-'" ); 381 } 382 return "values of " + ( regexpString == null ? "" : "regexp on " ) + "attribute '" + attrs + "'" 383 + " sorted by" + ( reverseOrder ? " reverse " : " " ) + SORT_ITEM_DESCRIPTIONS[sortItem] + " (" + count 384 + " different values)"; 385 } 386 387 private void addItems( ReportFormat.PluginReport report, int maxCount ) 388 { 389 Iterator iter = sorted.iterator(); 390 while ( iter.hasNext() && ( ( maxCount < 0 ) || ( maxCount-- > 0 ) ) ) 391 { 392 Freq freq = (Freq) iter.next(); 393 394 // value displayed in the report 395 String value = ( freq.values == null ) ? freq.key : StringUtils.join( freq.values, '-' ); 396 397 report.addItem( freq.count, valueDescriptions.getProperty( value, value ) ); 398 } 399 } 400 401 private void appendReport( ReportFormat.PluginReport report, int maxCount ) 402 { 403 report.beginPluginReport( this, describeResult( maxCount ) ); 404 appendLinkToFreqReport( report ); 405 addItems( report, maxCount ); 406 report.endPluginReport(); 407 } 408 409 /** 410 * Append links to freq reports, for freq extensions. 411 * 412 * @param report the report formatter 413 */ 414 protected void appendLinkToFreqReport( ReportFormat.PluginReport report ) 415 { 416 // no additionnal link for freq: it's intended for subclasses 417 } 418 419 public static class Freq 420 implements Serializable 421 { 422 private static final long serialVersionUID = 9195786567021952968L; 423 424 public final String key; 425 426 public final String[] values; 427 428 public int count = 0; 429 430 public long bytes = 0; 431 432 public Freq( String key, String[] values ) 433 { 434 this.key = key; 435 this.values = values; 436 437 // get values Strings as substring from key, to avoid keeping a pointer to the initial log String 438 int index = 0; 439 for ( int i = 0; i < values.length; i++ ) 440 { 441 int start = index; 442 index += values[i].length(); 443 values[i] = key.substring( start, index ); 444 index++; 445 } 446 } 447 448 public static String asKey( String[] values ) 449 { 450 return StringUtils.join( values, '\n' ); 451 } 452 } 453 454 private abstract static class FreqOrder 455 implements Comparator, Serializable 456 { 457 } 458 459 /** 460 * Order on freq count, then on key when count are equal. 461 */ 462 private static class CountOrder 463 extends FreqOrder 464 { 465 private static final long serialVersionUID = -933179294932120994L; 466 467 public int compare( Object o1, Object o2 ) 468 { 469 Freq freq1 = (Freq) o1; 470 Freq freq2 = (Freq) o2; 471 int diff = freq2.count - freq1.count; 472 return ( diff == 0 ) ? freq1.key.compareTo( freq2.key ) : diff; 473 } 474 } 475 476 /** 477 * Order on key (lexical order). 478 */ 479 private static class ValueOrder 480 extends FreqOrder 481 { 482 private static final long serialVersionUID = 1824757961838977443L; 483 484 public int compare( Object o1, Object o2 ) 485 { 486 Freq freq1 = (Freq) o1; 487 Freq freq2 = (Freq) o2; 488 return freq1.key.compareTo( freq2.key ); 489 } 490 } 491 492 /** 493 * Order on key length, then key lexical order. 494 * example: 1234 > 234 > 233 > A5 > 45 495 */ 496 private static class ValueLengthOrder 497 extends FreqOrder 498 { 499 private static final long serialVersionUID = 651766724239811116L; 500 501 public int compare( Object o1, Object o2 ) 502 { 503 Freq freq1 = (Freq) o1; 504 Freq freq2 = (Freq) o2; 505 int diff = freq2.key.length() - freq1.key.length(); 506 return ( diff == 0 ) ? freq2.key.compareTo( freq1.key ) : diff; 507 } 508 } 509 510 private static class ReverseOrder 511 extends FreqOrder 512 { 513 private static final long serialVersionUID = 539304109704333616L; 514 515 private final FreqOrder comparator; 516 517 public ReverseOrder( FreqOrder comparator ) 518 { 519 this.comparator = comparator; 520 } 521 522 public int compare( Object o1, Object o2 ) 523 { 524 return comparator.compare( o2, o1 ); 525 } 526 } 527 528 private static class Type 529 extends Plugin 530 { 531 public Type() 532 { 533 super( ID ); 534 } 535 536 public LogDistillation.Plugin newInstance( LogDistiller.Plugin conf ) 537 { 538 return new FreqPlugin( conf ); 539 } 540 } 541 }