001package net.sf.logdistiller.plugins; 002 003/* 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017import java.io.*; 018import java.util.*; 019import java.util.regex.Matcher; 020import java.util.regex.Pattern; 021import java.util.regex.PatternSyntaxException; 022 023import org.apache.commons.lang.StringUtils; 024 025import net.sf.logdistiller.LogDistillation; 026import net.sf.logdistiller.LogDistiller; 027import net.sf.logdistiller.LogEvent; 028import net.sf.logdistiller.LogType; 029import net.sf.logdistiller.Plugin; 030import net.sf.logdistiller.PluginConfigException; 031import net.sf.logdistiller.ReportFormat; 032 033/** 034 * Counts the frequency of each attribute's value. 035 * <p> 036 * Parameters: 037 * </p> 038 * <ul> 039 * <li><b>value</b> (deprecated since 1.1): definition of the value that will be taken into account 040 * <ul> 041 * <li><code>attribute</code> (mandatory): the attribute's name 042 * <li><code>regexp</code>: use a regexp to extract value taken in account for the first attribute 043 * <li><code>regexp.group</code> (default: -1): which group to extract? If negative, all groups will be concatenated 044 * <li><code>attribute2</code> (default: none): second attribute to append 045 * <li><code>attribute3</code> (default: none): third attribute to append 046 * </ul> 047 * </li> 048 * <li><b>value</b>: definition of the value that will be taken into account 049 * <ul> 050 * <li><code>attributes</code> (mandatory): the attributes' names, comma separated (no count limit) 051 * </li> 052 * </ul> 053 * <li><b>sorting options</b> 054 * <ul> 055 * <li><code>sortItem</code> (default: <code>count</code>): sort on which item? <code>count</code>, <code>value</code>, 056 * <code>valueLength</code> 057 * <li><code>reverseOrder</code> (default: <code>false</code>): by default, most frequent value are displayed first 058 * </ul> 059 * </li> 060 * <li><b>reporting</b> 061 * <ul> 062 * <li><code>valueDescriptions</code>: description to insert in the report instead of corresponding value (in java 063 * properties format) 064 * <li><code>maxGlobalReport</code> (default: 5): maximum number of values inserted in global report 065 * <li><code>maxGroupReport</code> (default: 25): maximum number of values inserted in group report 066 * </ul> 067 * </li> 068 * </ul> 069 * 070 * @see SamplingPlugin 071 * @since 0.6 072 */ 073public class FreqPlugin 074 extends LogDistillation.Plugin 075{ 076 private static final long serialVersionUID = -5162563816094662936L; 077 078 public final static String ID = "freq"; 079 080 public final static Plugin TYPE = new Type(); 081 082 private final static String[] SORT_ITEM_VALUES = { "count", "value", "valueLength" }; 083 084 private final static String[] SORT_ITEM_DESCRIPTIONS = { "count", "attribute's value", "attribute's value length" }; 085 086 private final static FreqOrder[] SORT_ITEM_COMPARATORS = 087 { new CountOrder(), new ValueOrder(), new ValueLengthOrder() }; 088 089 private final Map<String, Freq> freqs = new HashMap<String, Freq>(); 090 091 protected SortedSet<Freq> sorted; // sorted Freq 092 093 /** @deprecated since 1.1, use attributes instead */ 094 private final String attribute; 095 096 /** @since 0.8 097 * @deprecated since 1.1, use attributes instead */ 098 private final String attribute2; 099 100 /** @since 0.8 101 * @deprecated since 1.1, use attributes instead */ 102 private final String attribute3; 103 104 /** @deprecated since 1.1, use attributes instead */ 105 private transient LogType.AttributeInfo attributeInfo; 106 107 /** @deprecated since 1.1, use attributes instead */ 108 private transient LogType.AttributeInfo attributeInfo2; 109 110 /** @deprecated since 1.1, use attributes instead */ 111 private transient LogType.AttributeInfo attributeInfo3; 112 113 private final int sortItem; // index in SORT_ITEM_VALUES 114 115 private final boolean reverseOrder; 116 117 private final Properties valueDescriptions = new Properties(); 118 119 private final int maxGlobalReport; 120 121 private final int maxGroupReport; 122 123 /** @deprecated since 1.1, use attributes instead */ 124 private final String regexpString; 125 126 /** @deprecated since 1.1, use attributes instead */ 127 private transient Pattern regexp; 128 129 /** @deprecated since 1.1, use attributes instead */ 130 private final int regexpGroup; 131 132 /** @since 1.1 */ 133 protected final String[] attributes; 134 135 protected transient LogType.AttributeInfo[] attributesInfo; 136 137 public FreqPlugin( LogDistiller.Plugin definition ) 138 { 139 this( definition, true ); 140 } 141 142 protected FreqPlugin( LogDistiller.Plugin definition, boolean checkSampling ) 143 { 144 super( definition ); 145 LogDistiller.Group group = definition.getGroup(); 146 String context = "(logdistiller/group[id='" + ( ( group == null ) ? "default-id" : group.getId() ) 147 + "]/plugin[type='" + definition.getType() + "'])"; 148 149 attribute = definition.getParam( "attribute" ); 150 attribute2 = definition.getParam( "attribute2", null ); 151 attribute3 = definition.getParam( "attribute3", null ); 152 regexpString = definition.getParam( "regexp" ); 153 updatePattern(); 154 regexpGroup = Integer.parseInt( definition.getParam( "regexp.group", "-1" ) ); 155 String attrs = definition.getParam( "attributes" ); 156 attributes = ( attrs == null ) ? null : StringUtils.split( attrs, ", " ); 157 158 String attr = definition.getParam( "sortItem" ); 159 int sortItem_ = ( attr == null ) ? 0 : Arrays.binarySearch( SORT_ITEM_VALUES, attr ); 160 if ( sortItem_ < 0 ) 161 { 162 LogDistiller ld = group.getLogdistiller(); 163 ld.addWarning( context + " unsupported '" + attr 164 + "' sortItem value: choose one of [ 'count', 'value', 'valueLength' ], defaulting to 'count'" ); 165 sortItem_ = 0; 166 } 167 sortItem = sortItem_; 168 169 attr = definition.getParam( "reverseOrder", "false" ); 170 reverseOrder = Boolean.valueOf( attr ).booleanValue(); 171 attr = definition.getParam( "valueDescriptions" ); 172 if ( attr != null ) 173 { 174 try 175 { 176 valueDescriptions.load( new ByteArrayInputStream( attr.getBytes( "ISO-8859-1" ) ) ); 177 } 178 catch ( IOException ioe ) 179 { 180 ioe.printStackTrace(); 181 } 182 } 183 maxGlobalReport = Integer.parseInt( definition.getParam( "maxGlobalReport", "5" ) ); 184 maxGroupReport = Integer.parseInt( definition.getParam( "maxGroupReport", "25" ) ); 185 186 if ( checkSampling 187 && ( ( definition.getParam( "sampling.maxCount" ) != null ) 188 || ( definition.getParam( "sampling.maxSize" ) != null ) 189 || ( definition.getParam( "sampling.filename" ) != null ) ) ) 190 { 191 LogDistiller ld = group.getLogdistiller(); 192 ld.addWarning( context + " sampling features (sampling.* parameters) disabled from freq plugin: " 193 + "moved to sampling plugin since LogDistiller 0.9" ); 194 } 195 196 if ( attribute != null ) 197 { 198 LogDistiller ld = group.getLogdistiller(); 199 ld.addWarning( context + " 'attribute' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use 'attributes'" ); 200 if ( regexp != null ) 201 { 202 ld.addWarning( context + " 'regexp' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use extended attributes" ); 203 } 204 } 205 } 206 207 private void readObject( ObjectInputStream in ) 208 throws IOException, ClassNotFoundException 209 { 210 in.defaultReadObject(); 211 updatePattern(); 212 } 213 214 /** @deprecated since 1.1, use attributes instead */ 215 private void updatePattern() 216 { 217 try 218 { 219 regexp = ( regexpString == null ) ? null : Pattern.compile( regexpString ); 220 } 221 catch ( PatternSyntaxException pse ) 222 { 223 throw new PluginConfigException( "regexp parameter contains bad pattern '" + regexpString 224 + "': caused an exception " + pse.getMessage() ); 225 } 226 } 227 228 public void begin( File destinationDirectory ) 229 throws FileNotFoundException 230 { 231 } 232 233 public void addLogEvent( LogEvent logEvent ) 234 throws IOException 235 { 236 String[] values = extractAttributes( logEvent ); 237 238 String key = Freq.asKey( values ); 239 Freq freq = (Freq) freqs.get( key ); 240 if ( freq == null ) 241 { 242 freq = new Freq( key, values ); 243 freqs.put( key, freq ); 244 } 245 246 addLogEventToFreq( logEvent, freq ); 247 } 248 249 private String[] extractAttributes( LogEvent logEvent ) 250 { 251 if ( attributes == null ) 252 { 253 return new String[]{ oldComputeValue( logEvent ) }; 254 } 255 256 // initialize attributesInfo if necessary 257 if ( attributesInfo == null ) 258 { 259 attributesInfo = new LogType.AttributeInfo[attributes.length]; 260 LogType.Description description = logEvent.getFactory().getDescription(); 261 262 for ( int i = 0; i < attributes.length; i++ ) 263 { 264 attributesInfo[i] = description.getAttributeInfo( attributes[i] ); 265 } 266 } 267 268 String[] values = new String[attributesInfo.length]; 269 for ( int i = 0; i < attributesInfo.length; i++ ) 270 { 271 values[i] = logEvent.getValue( attributesInfo[i] ); 272 } 273 return values; 274 } 275 276 /** @deprecated since 1.1, use attributes instead */ 277 private String oldComputeValue( LogEvent logEvent ) 278 { 279 // initialize attributeInfo if necessary 280 if ( attributeInfo == null ) 281 { 282 LogType.Description description = logEvent.getFactory().getDescription(); 283 attributeInfo = description.getAttributeInfo( attribute ); 284 if ( StringUtils.isNotEmpty( attribute2 ) ) 285 { 286 attributeInfo2 = description.getAttributeInfo( attribute2 ); 287 if ( StringUtils.isNotEmpty( attribute3 ) ) 288 { 289 attributeInfo3 = description.getAttributeInfo( attribute3 ); 290 } 291 } 292 } 293 294 // calculate freq value 295 String value = logEvent.getValue( attributeInfo ); 296 if ( attributeInfo2 != null ) 297 { 298 value += '-' + logEvent.getValue( attributeInfo2 ); 299 if ( attributeInfo3 != null ) 300 { 301 value += '-' + logEvent.getValue( attributeInfo3 ); 302 } 303 } 304 if ( regexp != null ) 305 { 306 Matcher matcher = regexp.matcher( value ); 307 if ( matcher.find() && ( regexpGroup <= matcher.groupCount() ) ) 308 { 309 if ( regexpGroup >= 0 ) 310 { 311 value = matcher.group( regexpGroup ); 312 } 313 else if ( matcher.groupCount() > 1 ) 314 { 315 StringBuffer buff = new StringBuffer( matcher.group( 1 ) ); 316 for ( int i = 2; i <= matcher.groupCount(); i++ ) 317 { 318 buff.append( '-' ).append( matcher.group( i ) ); 319 } 320 value = buff.toString(); 321 } 322 else if ( matcher.groupCount() == 1 ) 323 { 324 value = matcher.group( 1 ); 325 } 326 else 327 { 328 value = matcher.group( 0 ); 329 } 330 } 331 else 332 { 333 value = ""; 334 } 335 if ( value == null ) 336 { 337 value = ""; 338 } 339 } 340 341 return value; 342 } 343 344 /** 345 * @since 1.1 346 */ 347 protected void addLogEventToFreq( LogEvent logEvent, Freq freq ) 348 throws IOException 349 { 350 // increase freq count 351 freq.count++; 352 int bytes = logEvent.getRawLog().length() + LogDistillation.LINE_SEPARATOR.length(); 353 freq.bytes += bytes; 354 } 355 356 public void end() 357 throws IOException 358 { 359 FreqOrder comparator = SORT_ITEM_COMPARATORS[sortItem]; 360 comparator = reverseOrder ? new ReverseOrder( comparator ) : comparator; 361 sorted = new TreeSet<Freq>( comparator ); 362 sorted.addAll( freqs.values() ); 363 } 364 365 public void appendGroupReport( ReportFormat.PluginReport report ) 366 { 367 appendReport( report, maxGroupReport ); 368 } 369 370 public void appendGlobalReport( ReportFormat.PluginReport report ) 371 { 372 appendReport( report, maxGlobalReport ); 373 } 374 375 SortedSet<Freq> getSorted() 376 { 377 return sorted; 378 } 379 380 private String describeResult( int maxCount ) 381 { 382 String count = 383 ( ( maxCount < 0 ) || ( maxCount >= sorted.size() ) ) ? String.valueOf( sorted.size() ) 384 : ( maxCount + "/" + sorted.size() ); 385 String attrs; 386 if ( attributes == null ) 387 { 388 attrs = attribute 389 + ( ( attribute2 == null ) ? "" : 390 ( "'-'" + attribute2 + ( ( attribute3 == null ) ? "" : ( "'-'" + attribute3 ) ) ) ); 391 } 392 else 393 { 394 attrs = StringUtils.join( attributes, "'-'" ); 395 } 396 return "values of " + ( regexpString == null ? "" : "regexp on " ) + "attribute '" + attrs + "'" 397 + " sorted by" + ( reverseOrder ? " reverse " : " " ) + SORT_ITEM_DESCRIPTIONS[sortItem] + " (" + count 398 + " different values)"; 399 } 400 401 private void addItems( ReportFormat.PluginReport report, int maxCount ) 402 { 403 Iterator<Freq> iter = sorted.iterator(); 404 while ( iter.hasNext() && ( ( maxCount < 0 ) || ( maxCount-- > 0 ) ) ) 405 { 406 Freq freq = iter.next(); 407 408 // value displayed in the report 409 String value = ( freq.values == null ) ? freq.key : StringUtils.join( freq.values, '-' ); 410 411 report.addItem( freq.count, valueDescriptions.getProperty( value, value ) ); 412 } 413 } 414 415 private void appendReport( ReportFormat.PluginReport report, int maxCount ) 416 { 417 report.beginPluginReport( this, describeResult( maxCount ) ); 418 appendLinkToFreqReport( report ); 419 addItems( report, maxCount ); 420 report.endPluginReport(); 421 } 422 423 /** 424 * Append links to freq reports, for freq extensions. 425 * 426 * @param report the report formatter 427 */ 428 protected void appendLinkToFreqReport( ReportFormat.PluginReport report ) 429 { 430 // no additionnal link for freq: it's intended for subclasses 431 } 432 433 public static class Freq 434 implements Serializable 435 { 436 private static final long serialVersionUID = 9195786567021952968L; 437 438 public final String key; 439 440 public final String[] values; 441 442 public int count = 0; 443 444 public long bytes = 0; 445 446 public Freq( String key, String[] values ) 447 { 448 this.key = key; 449 this.values = values; 450 451 // get values Strings as substring from key, to avoid keeping a pointer to the initial log String 452 int index = 0; 453 for ( int i = 0; i < values.length; i++ ) 454 { 455 int start = index; 456 index += values[i].length(); 457 values[i] = key.substring( start, index ); 458 index++; 459 } 460 } 461 462 public static String asKey( String[] values ) 463 { 464 return StringUtils.join( values, '\n' ); 465 } 466 } 467 468 private abstract static class FreqOrder 469 implements Comparator<Freq>, Serializable 470 { 471 private static final long serialVersionUID = -2574387085671510158L; 472 } 473 474 /** 475 * Order on freq count, then on key when count are equal. 476 */ 477 private static class CountOrder 478 extends FreqOrder 479 { 480 private static final long serialVersionUID = -933179294932120994L; 481 482 public int compare( Freq freq1, Freq freq2 ) 483 { 484 int diff = freq2.count - freq1.count; 485 return ( diff == 0 ) ? freq1.key.compareTo( freq2.key ) : diff; 486 } 487 } 488 489 /** 490 * Order on key (lexical order). 491 */ 492 private static class ValueOrder 493 extends FreqOrder 494 { 495 private static final long serialVersionUID = 1824757961838977443L; 496 497 public int compare( Freq freq1, Freq freq2 ) 498 { 499 return freq1.key.compareTo( freq2.key ); 500 } 501 } 502 503 /** 504 * Order on key length, then key lexical order. 505 * example: 1234 > 234 > 233 > A5 > 45 506 */ 507 private static class ValueLengthOrder 508 extends FreqOrder 509 { 510 private static final long serialVersionUID = 651766724239811116L; 511 512 public int compare( Freq freq1, Freq freq2 ) 513 { 514 int diff = freq2.key.length() - freq1.key.length(); 515 return ( diff == 0 ) ? freq2.key.compareTo( freq1.key ) : diff; 516 } 517 } 518 519 private static class ReverseOrder 520 extends FreqOrder 521 { 522 private static final long serialVersionUID = 539304109704333616L; 523 524 private final FreqOrder comparator; 525 526 public ReverseOrder( FreqOrder comparator ) 527 { 528 this.comparator = comparator; 529 } 530 531 public int compare( Freq freq1, Freq freq2 ) 532 { 533 return comparator.compare( freq2, freq1 ); 534 } 535 } 536 537 private static class Type 538 extends Plugin 539 { 540 public Type() 541 { 542 super( ID ); 543 } 544 545 public LogDistillation.Plugin newInstance( LogDistiller.Plugin conf ) 546 { 547 return new FreqPlugin( conf ); 548 } 549 } 550}