001 package net.sf.logdistiller.plugins;
002
003 /*
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017 import java.io.*;
018 import java.util.*;
019 import java.util.regex.Matcher;
020 import java.util.regex.Pattern;
021 import java.util.regex.PatternSyntaxException;
022
023 import org.apache.commons.lang.StringUtils;
024
025 import net.sf.logdistiller.LogDistillation;
026 import net.sf.logdistiller.LogDistiller;
027 import net.sf.logdistiller.LogEvent;
028 import net.sf.logdistiller.LogType;
029 import net.sf.logdistiller.Plugin;
030 import net.sf.logdistiller.PluginConfigException;
031 import net.sf.logdistiller.ReportFormat;
032
033 /**
034 * Counts the frequency of each attribute's value.
035 * <p>
036 * Parameters:
037 * </p>
038 * <ul>
039 * <li><b>value</b> (deprecated since 1.1): definition of the value that will be taken into account
040 * <ul>
041 * <li><code>attribute</code> (mandatory): the attribute's name
042 * <li><code>regexp</code>: use a regexp to extract value taken in account for the first attribute
043 * <li><code>regexp.group</code> (default: -1): which group to extract? If negative, all groups will be concatenated
044 * <li><code>attribute2</code> (default: none): second attribute to append
045 * <li><code>attribute3</code> (default: none): third attribute to append
046 * </ul>
047 * </li>
048 * <li><b>value</b>: definition of the value that will be taken into account
049 * <ul>
050 * <li><code>attributes</code> (mandatory): the attributes' names, comma separated (no count limit)
051 * </li>
052 * </ul>
053 * <li><b>sorting options</b>
054 * <ul>
055 * <li><code>sortItem</code> (default: <code>count</code>): sort on which item? <code>count</code>, <code>value</code>,
056 * <code>valueLength</code>
057 * <li><code>reverseOrder</code> (default: <code>false</code>): by default, most frequent value are displayed first
058 * </ul>
059 * </li>
060 * <li><b>reporting</b>
061 * <ul>
062 * <li><code>valueDescriptions</code>: description to insert in the report instead of corresponding value (in java
063 * properties format)
064 * <li><code>maxGlobalReport</code> (default: 5): maximum number of values inserted in global report
065 * <li><code>maxGroupReport</code> (default: 25): maximum number of values inserted in group report
066 * </ul>
067 * </li>
068 * </ul>
069 *
070 * @see SamplingPlugin
071 * @since 0.6
072 */
073 public class FreqPlugin
074 extends LogDistillation.Plugin
075 {
076 private static final long serialVersionUID = -5162563816094662936L;
077
078 public final static String ID = "freq";
079
080 public final static Plugin TYPE = new Type();
081
082 private final static String[] SORT_ITEM_VALUES = { "count", "value", "valueLength" };
083
084 private final static String[] SORT_ITEM_DESCRIPTIONS = { "count", "attribute's value", "attribute's value length" };
085
086 private final static FreqOrder[] SORT_ITEM_COMPARATORS =
087 { new CountOrder(), new ValueOrder(), new ValueLengthOrder() };
088
089 private final Map freqs = new HashMap(); // key = attribute's value (String), value = Freq instance
090
091 protected SortedSet sorted; // sorted Freq
092
093 /** @deprecated since 1.1, use attributes instead */
094 private final String attribute;
095
096 /** @since 0.8
097 * @deprecated since 1.1, use attributes instead */
098 private final String attribute2;
099
100 /** @since 0.8
101 * @deprecated since 1.1, use attributes instead */
102 private final String attribute3;
103
104 /** @deprecated since 1.1, use attributes instead */
105 private transient LogType.AttributeInfo attributeInfo;
106
107 /** @deprecated since 1.1, use attributes instead */
108 private transient LogType.AttributeInfo attributeInfo2;
109
110 /** @deprecated since 1.1, use attributes instead */
111 private transient LogType.AttributeInfo attributeInfo3;
112
113 private final int sortItem; // index in SORT_ITEM_VALUES
114
115 private final boolean reverseOrder;
116
117 private final Properties valueDescriptions = new Properties();
118
119 private final int maxGlobalReport;
120
121 private final int maxGroupReport;
122
123 /** @deprecated since 1.1, use attributes instead */
124 private final String regexpString;
125
126 /** @deprecated since 1.1, use attributes instead */
127 private transient Pattern regexp;
128
129 /** @deprecated since 1.1, use attributes instead */
130 private final int regexpGroup;
131
132 /** @since 1.1 */
133 protected final String[] attributes;
134
135 protected transient LogType.AttributeInfo[] attributesInfo;
136
137 public FreqPlugin( LogDistiller.Plugin definition )
138 {
139 this( definition, true );
140 }
141
142 protected FreqPlugin( LogDistiller.Plugin definition, boolean checkSampling )
143 {
144 super( definition );
145 attribute = definition.getParam( "attribute" );
146 attribute2 = definition.getParam( "attribute2", null );
147 attribute3 = definition.getParam( "attribute3", null );
148 regexpString = definition.getParam( "regexp" );
149 updatePattern();
150 regexpGroup = Integer.parseInt( definition.getParam( "regexp.group", "-1" ) );
151 String attrs = definition.getParam( "attributes" );
152 attributes = ( attrs == null ) ? null : StringUtils.split( attrs, ", " );
153
154 String attr = definition.getParam( "sortItem" );
155 sortItem = ( attr == null ) ? 0 : Arrays.binarySearch( SORT_ITEM_VALUES, attr );
156 attr = definition.getParam( "reverseOrder", "false" );
157 reverseOrder = Boolean.valueOf( attr ).booleanValue();
158 attr = definition.getParam( "valueDescriptions" );
159 if ( attr != null )
160 {
161 try
162 {
163 valueDescriptions.load( new ByteArrayInputStream( attr.getBytes( "ISO-8859-1" ) ) );
164 }
165 catch ( IOException ioe )
166 {
167 ioe.printStackTrace();
168 }
169 }
170 maxGlobalReport = Integer.parseInt( definition.getParam( "maxGlobalReport", "5" ) );
171 maxGroupReport = Integer.parseInt( definition.getParam( "maxGroupReport", "25" ) );
172
173 if ( checkSampling
174 && ( ( definition.getParam( "sampling.maxCount" ) != null )
175 || ( definition.getParam( "sampling.maxSize" ) != null ) || ( definition.getParam( "sampling.filename" ) != null ) ) )
176 {
177 LogDistiller ld = definition.getGroup().getLogdistiller();
178 ld.addWarning( "sampling features (sampling.* parameters) disabled from freq plugin: "
179 + "moved to sampling plugin in LogDistiller 0.9" );
180 }
181
182 if ( attribute != null )
183 {
184 LogDistiller ld = definition.getGroup().getLogdistiller();
185 ld.addWarning( "'attribute' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use 'attributes'" );
186 if ( regexp != null )
187 {
188 ld.addWarning( "'regexp' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use extended attributes" );
189 }
190 }
191 }
192
193 private void readObject( ObjectInputStream in )
194 throws IOException, ClassNotFoundException
195 {
196 in.defaultReadObject();
197 updatePattern();
198 }
199
200 /** @deprecated since 1.1, use attributes instead */
201 private void updatePattern()
202 {
203 try
204 {
205 regexp = ( regexpString == null ) ? null : Pattern.compile( regexpString );
206 }
207 catch ( PatternSyntaxException pse )
208 {
209 throw new PluginConfigException( "regexp parameter contains bad pattern '" + regexpString
210 + "': caused an exception " + pse.getMessage() );
211 }
212 }
213
214 public void begin( File destinationDirectory )
215 throws FileNotFoundException
216 {
217 }
218
219 public void addLogEvent( LogEvent logEvent )
220 throws IOException
221 {
222 String[] values = extractAttributes( logEvent );
223
224 String key = Freq.asKey( values );
225 Freq freq = (Freq) freqs.get( key );
226 if ( freq == null )
227 {
228 freq = new Freq( key, values );
229 freqs.put( key, freq );
230 }
231
232 addLogEventToFreq( logEvent, freq );
233 }
234
235 private String[] extractAttributes( LogEvent logEvent )
236 {
237 if ( attributes == null )
238 {
239 return new String[]{ oldComputeValue( logEvent ) };
240 }
241
242 // initialize attributesInfo if necessary
243 if ( attributesInfo == null )
244 {
245 attributesInfo = new LogType.AttributeInfo[attributes.length];
246 LogType.Description description = logEvent.getFactory().getDescription();
247
248 for ( int i = 0; i < attributes.length; i++ )
249 {
250 attributesInfo[i] = description.getAttributeInfo( attributes[i] );
251 }
252 }
253
254 String[] values = new String[attributesInfo.length];
255 for ( int i = 0; i < attributesInfo.length; i++ )
256 {
257 values[i] = logEvent.getValue( attributesInfo[i] );
258 }
259 return values;
260 }
261
262 /** @deprecated since 1.1, use attributes instead */
263 private String oldComputeValue( LogEvent logEvent )
264 {
265 // initialize attributeInfo if necessary
266 if ( attributeInfo == null )
267 {
268 LogType.Description description = logEvent.getFactory().getDescription();
269 attributeInfo = description.getAttributeInfo( attribute );
270 if ( StringUtils.isNotEmpty( attribute2 ) )
271 {
272 attributeInfo2 = description.getAttributeInfo( attribute2 );
273 if ( StringUtils.isNotEmpty( attribute3 ) )
274 {
275 attributeInfo3 = description.getAttributeInfo( attribute3 );
276 }
277 }
278 }
279
280 // calculate freq value
281 String value = logEvent.getValue( attributeInfo );
282 if ( attributeInfo2 != null )
283 {
284 value += '-' + logEvent.getValue( attributeInfo2 );
285 if ( attributeInfo3 != null )
286 {
287 value += '-' + logEvent.getValue( attributeInfo3 );
288 }
289 }
290 if ( regexp != null )
291 {
292 Matcher matcher = regexp.matcher( value );
293 if ( matcher.find() && ( regexpGroup <= matcher.groupCount() ) )
294 {
295 if ( regexpGroup >= 0 )
296 {
297 value = matcher.group( regexpGroup );
298 }
299 else if ( matcher.groupCount() > 1 )
300 {
301 StringBuffer buff = new StringBuffer( matcher.group( 1 ) );
302 for ( int i = 2; i <= matcher.groupCount(); i++ )
303 {
304 buff.append( '-' ).append( matcher.group( i ) );
305 }
306 value = buff.toString();
307 }
308 else if ( matcher.groupCount() == 1 )
309 {
310 value = matcher.group( 1 );
311 }
312 else
313 {
314 value = matcher.group( 0 );
315 }
316 }
317 else
318 {
319 value = "";
320 }
321 if ( value == null )
322 {
323 value = "";
324 }
325 }
326
327 return value;
328 }
329
330 /**
331 * @since 1.1
332 */
333 protected void addLogEventToFreq( LogEvent logEvent, Freq freq )
334 throws IOException
335 {
336 // increase freq count
337 freq.count++;
338 int bytes = logEvent.getRawLog().length() + LogDistillation.LINE_SEPARATOR.length();
339 freq.bytes += bytes;
340 }
341
342 public void end()
343 throws IOException
344 {
345 FreqOrder comparator = SORT_ITEM_COMPARATORS[sortItem];
346 comparator = reverseOrder ? new ReverseOrder( comparator ) : comparator;
347 sorted = new TreeSet( comparator );
348 sorted.addAll( freqs.values() );
349 }
350
351 public void appendGroupReport( ReportFormat.PluginReport report )
352 {
353 appendReport( report, maxGroupReport );
354 }
355
356 public void appendGlobalReport( ReportFormat.PluginReport report )
357 {
358 appendReport( report, maxGlobalReport );
359 }
360
361 SortedSet getSorted()
362 {
363 return sorted;
364 }
365
366 private String describeResult( int maxCount )
367 {
368 String count =
369 ( ( maxCount < 0 ) || ( maxCount >= sorted.size() ) ) ? String.valueOf( sorted.size() )
370 : ( maxCount + "/" + sorted.size() );
371 String attrs;
372 if ( attributes == null )
373 {
374 attrs = attribute
375 + ( ( attribute2 == null ) ? "" :
376 ( "'-'" + attribute2 + ( ( attribute3 == null ) ? "" : ( "'-'" + attribute3 ) ) ) );
377 }
378 else
379 {
380 attrs = StringUtils.join( attributes, "'-'" );
381 }
382 return "values of " + ( regexpString == null ? "" : "regexp on " ) + "attribute '" + attrs + "'"
383 + " sorted by" + ( reverseOrder ? " reverse " : " " ) + SORT_ITEM_DESCRIPTIONS[sortItem] + " (" + count
384 + " different values)";
385 }
386
387 private void addItems( ReportFormat.PluginReport report, int maxCount )
388 {
389 Iterator iter = sorted.iterator();
390 while ( iter.hasNext() && ( ( maxCount < 0 ) || ( maxCount-- > 0 ) ) )
391 {
392 Freq freq = (Freq) iter.next();
393
394 // value displayed in the report
395 String value = ( freq.values == null ) ? freq.key : StringUtils.join( freq.values, '-' );
396
397 report.addItem( freq.count, valueDescriptions.getProperty( value, value ) );
398 }
399 }
400
401 private void appendReport( ReportFormat.PluginReport report, int maxCount )
402 {
403 report.beginPluginReport( this, describeResult( maxCount ) );
404 appendLinkToFreqReport( report );
405 addItems( report, maxCount );
406 report.endPluginReport();
407 }
408
409 /**
410 * Append links to freq reports, for freq extensions.
411 *
412 * @param report the report formatter
413 */
414 protected void appendLinkToFreqReport( ReportFormat.PluginReport report )
415 {
416 // no additionnal link for freq: it's intended for subclasses
417 }
418
419 public static class Freq
420 implements Serializable
421 {
422 private static final long serialVersionUID = 9195786567021952968L;
423
424 public final String key;
425
426 public final String[] values;
427
428 public int count = 0;
429
430 public long bytes = 0;
431
432 public Freq( String key, String[] values )
433 {
434 this.key = key;
435 this.values = values;
436
437 // get values Strings as substring from key, to avoid keeping a pointer to the initial log String
438 int index = 0;
439 for ( int i = 0; i < values.length; i++ )
440 {
441 int start = index;
442 index += values[i].length();
443 values[i] = key.substring( start, index );
444 index++;
445 }
446 }
447
448 public static String asKey( String[] values )
449 {
450 return StringUtils.join( values, '\n' );
451 }
452 }
453
454 private abstract static class FreqOrder
455 implements Comparator, Serializable
456 {
457 }
458
459 /**
460 * Order on freq count, then on key when count are equal.
461 */
462 private static class CountOrder
463 extends FreqOrder
464 {
465 private static final long serialVersionUID = -933179294932120994L;
466
467 public int compare( Object o1, Object o2 )
468 {
469 Freq freq1 = (Freq) o1;
470 Freq freq2 = (Freq) o2;
471 int diff = freq2.count - freq1.count;
472 return ( diff == 0 ) ? freq1.key.compareTo( freq2.key ) : diff;
473 }
474 }
475
476 /**
477 * Order on key (lexical order).
478 */
479 private static class ValueOrder
480 extends FreqOrder
481 {
482 private static final long serialVersionUID = 1824757961838977443L;
483
484 public int compare( Object o1, Object o2 )
485 {
486 Freq freq1 = (Freq) o1;
487 Freq freq2 = (Freq) o2;
488 return freq1.key.compareTo( freq2.key );
489 }
490 }
491
492 /**
493 * Order on key length, then key lexical order.
494 * example: 1234 > 234 > 233 > A5 > 45
495 */
496 private static class ValueLengthOrder
497 extends FreqOrder
498 {
499 private static final long serialVersionUID = 651766724239811116L;
500
501 public int compare( Object o1, Object o2 )
502 {
503 Freq freq1 = (Freq) o1;
504 Freq freq2 = (Freq) o2;
505 int diff = freq2.key.length() - freq1.key.length();
506 return ( diff == 0 ) ? freq2.key.compareTo( freq1.key ) : diff;
507 }
508 }
509
510 private static class ReverseOrder
511 extends FreqOrder
512 {
513 private static final long serialVersionUID = 539304109704333616L;
514
515 private final FreqOrder comparator;
516
517 public ReverseOrder( FreqOrder comparator )
518 {
519 this.comparator = comparator;
520 }
521
522 public int compare( Object o1, Object o2 )
523 {
524 return comparator.compare( o2, o1 );
525 }
526 }
527
528 private static class Type
529 extends Plugin
530 {
531 public Type()
532 {
533 super( ID );
534 }
535
536 public LogDistillation.Plugin newInstance( LogDistiller.Plugin conf )
537 {
538 return new FreqPlugin( conf );
539 }
540 }
541 }