View Javadoc
1   package net.sf.logdistiller.plugins;
2   
3   /*
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  import java.io.*;
18  import java.util.*;
19  import java.util.regex.Matcher;
20  import java.util.regex.Pattern;
21  import java.util.regex.PatternSyntaxException;
22  
23  import org.apache.commons.lang.StringUtils;
24  
25  import net.sf.logdistiller.LogDistillation;
26  import net.sf.logdistiller.LogDistiller;
27  import net.sf.logdistiller.LogEvent;
28  import net.sf.logdistiller.LogType;
29  import net.sf.logdistiller.Plugin;
30  import net.sf.logdistiller.PluginConfigException;
31  import net.sf.logdistiller.ReportFormat;
32  
33  /**
34   * Counts the frequency of each attribute's value.
35   * <p>
36   * Parameters:
37   * </p>
38   * <ul>
39   * <li><b>value</b> (deprecated since 1.1): definition of the value that will be taken into account
40   * <ul>
41   * <li><code>attribute</code> (mandatory): the attribute's name
42   * <li><code>regexp</code>: use a regexp to extract value taken in account for the first attribute
43   * <li><code>regexp.group</code> (default: -1): which group to extract? If negative, all groups will be concatenated
44   * <li><code>attribute2</code> (default: none): second attribute to append
45   * <li><code>attribute3</code> (default: none): third attribute to append
46   * </ul>
47   * </li>
48   * <li><b>value</b>: definition of the value that will be taken into account
49   * <ul>
50   * <li><code>attributes</code> (mandatory): the attributes' names, comma separated (no count limit)
51   * </li>
52   * </ul>
53   * <li><b>sorting options</b>
54   * <ul>
55   * <li><code>sortItem</code> (default: <code>count</code>): sort on which item? <code>count</code>, <code>value</code>,
56   * <code>valueLength</code>
57   * <li><code>reverseOrder</code> (default: <code>false</code>): by default, most frequent value are displayed first
58   * </ul>
59   * </li>
60   * <li><b>reporting</b>
61   * <ul>
62   * <li><code>valueDescriptions</code>: description to insert in the report instead of corresponding value (in java
63   * properties format)
64   * <li><code>maxGlobalReport</code> (default: 5): maximum number of values inserted in global report
65   * <li><code>maxGroupReport</code> (default: 25): maximum number of values inserted in group report
66   * </ul>
67   * </li>
68   * </ul>
69   *
70   * @see SamplingPlugin
71   * @since 0.6
72   */
73  public class FreqPlugin
74      extends LogDistillation.Plugin
75  {
76      private static final long serialVersionUID = -5162563816094662936L;
77  
78      public final static String ID = "freq";
79  
80      public final static Plugin TYPE = new Type();
81  
82      private final static String[] SORT_ITEM_VALUES = { "count", "value", "valueLength" };
83  
84      private final static String[] SORT_ITEM_DESCRIPTIONS = { "count", "attribute's value", "attribute's value length" };
85  
86      private final static FreqOrder[] SORT_ITEM_COMPARATORS =
87          { new CountOrder(), new ValueOrder(), new ValueLengthOrder() };
88  
89      private final Map<String, Freq> freqs = new HashMap<String, Freq>();
90  
91      protected SortedSet<Freq> sorted; // sorted Freq
92  
93      /** @deprecated since 1.1, use attributes instead */
94      private final String attribute;
95  
96      /** @since 0.8
97       * @deprecated since 1.1, use attributes instead */
98      private final String attribute2;
99  
100     /** @since 0.8
101      * @deprecated since 1.1, use attributes instead */
102     private final String attribute3;
103 
104     /** @deprecated since 1.1, use attributes instead */
105     private transient LogType.AttributeInfo attributeInfo;
106 
107     /** @deprecated since 1.1, use attributes instead */
108     private transient LogType.AttributeInfo attributeInfo2;
109 
110     /** @deprecated since 1.1, use attributes instead */
111     private transient LogType.AttributeInfo attributeInfo3;
112 
113     private final int sortItem; // index in SORT_ITEM_VALUES
114 
115     private final boolean reverseOrder;
116 
117     private final Properties valueDescriptions = new Properties();
118 
119     private final int maxGlobalReport;
120 
121     private final int maxGroupReport;
122 
123     /** @deprecated since 1.1, use attributes instead */
124     private final String regexpString;
125 
126     /** @deprecated since 1.1, use attributes instead */
127     private transient Pattern regexp;
128 
129     /** @deprecated since 1.1, use attributes instead */
130     private final int regexpGroup;
131 
132     /** @since 1.1 */
133     protected final String[] attributes;
134 
135     protected transient LogType.AttributeInfo[] attributesInfo;
136 
137     public FreqPlugin( LogDistiller.Plugin definition )
138     {
139         this( definition, true );
140     }
141 
142     protected FreqPlugin( LogDistiller.Plugin definition, boolean checkSampling )
143     {
144         super( definition );
145         LogDistiller.Group group = definition.getGroup();
146         String context = "(logdistiller/group[id='" + ( ( group == null ) ? "default-id" : group.getId() )
147             + "]/plugin[type='" + definition.getType() + "'])";
148 
149         attribute = definition.getParam( "attribute" );
150         attribute2 = definition.getParam( "attribute2", null );
151         attribute3 = definition.getParam( "attribute3", null );
152         regexpString = definition.getParam( "regexp" );
153         updatePattern();
154         regexpGroup = Integer.parseInt( definition.getParam( "regexp.group", "-1" ) );
155         String attrs = definition.getParam( "attributes" );
156         attributes = ( attrs == null ) ? null : StringUtils.split( attrs, ", " );
157 
158         String attr = definition.getParam( "sortItem" );
159         int sortItem_ = ( attr == null ) ? 0 : Arrays.binarySearch( SORT_ITEM_VALUES, attr );
160         if ( sortItem_ < 0 )
161         {
162             LogDistiller ld = group.getLogdistiller();
163             ld.addWarning( context + " unsupported '" + attr
164                 + "' sortItem value: choose one of [ 'count', 'value', 'valueLength' ], defaulting to 'count'" );
165             sortItem_ = 0;
166         }
167         sortItem = sortItem_;
168 
169         attr = definition.getParam( "reverseOrder", "false" );
170         reverseOrder = Boolean.valueOf( attr ).booleanValue();
171         attr = definition.getParam( "valueDescriptions" );
172         if ( attr != null )
173         {
174             try
175             {
176                 valueDescriptions.load( new ByteArrayInputStream( attr.getBytes( "ISO-8859-1" ) ) );
177             }
178             catch ( IOException ioe )
179             {
180                 ioe.printStackTrace();
181             }
182         }
183         maxGlobalReport = Integer.parseInt( definition.getParam( "maxGlobalReport", "5" ) );
184         maxGroupReport = Integer.parseInt( definition.getParam( "maxGroupReport", "25" ) );
185 
186         if ( checkSampling
187             && ( ( definition.getParam( "sampling.maxCount" ) != null )
188                 || ( definition.getParam( "sampling.maxSize" ) != null )
189                 || ( definition.getParam( "sampling.filename" ) != null ) ) )
190         {
191             LogDistiller ld = group.getLogdistiller();
192             ld.addWarning( context + " sampling features (sampling.* parameters) disabled from freq plugin: "
193                            + "moved to sampling plugin since LogDistiller 0.9" );
194         }
195 
196         if ( attribute != null )
197         {
198             LogDistiller ld = group.getLogdistiller();
199             ld.addWarning( context + " 'attribute' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use 'attributes'" );
200             if ( regexp != null )
201             {
202                 ld.addWarning( context + " 'regexp' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use extended attributes" );
203             }
204         }
205     }
206 
207     private void readObject( ObjectInputStream in )
208         throws IOException, ClassNotFoundException
209     {
210         in.defaultReadObject();
211         updatePattern();
212     }
213 
214     /** @deprecated since 1.1, use attributes instead */
215     private void updatePattern()
216     {
217         try
218         {
219             regexp = ( regexpString == null ) ? null : Pattern.compile( regexpString );
220         }
221         catch ( PatternSyntaxException pse )
222         {
223             throw new PluginConfigException( "regexp parameter contains bad pattern '" + regexpString
224                 + "': caused an exception " + pse.getMessage() );
225         }
226     }
227 
228     public void begin( File destinationDirectory )
229         throws FileNotFoundException
230     {
231     }
232 
233     public void addLogEvent( LogEvent logEvent )
234         throws IOException
235     {
236         String[] values = extractAttributes( logEvent );
237 
238         String key = Freq.asKey( values );
239         Freq freq = (Freq) freqs.get( key );
240         if ( freq == null )
241         {
242             freq = new Freq( key, values );
243             freqs.put( key, freq );
244         }
245 
246         addLogEventToFreq( logEvent, freq );
247     }
248 
249     private String[] extractAttributes( LogEvent logEvent )
250     {
251         if ( attributes == null )
252         {
253             return new String[]{ oldComputeValue( logEvent ) };
254         }
255 
256         // initialize attributesInfo if necessary
257         if ( attributesInfo == null )
258         {
259             attributesInfo = new LogType.AttributeInfo[attributes.length];
260             LogType.Description description = logEvent.getFactory().getDescription();
261 
262             for ( int i = 0; i < attributes.length; i++ )
263             {
264                 attributesInfo[i] = description.getAttributeInfo( attributes[i] );
265             }
266         }
267 
268         String[] values = new String[attributesInfo.length];
269         for ( int i = 0; i < attributesInfo.length; i++ )
270         {
271             values[i] = logEvent.getValue( attributesInfo[i] );
272         }
273         return values;
274     }
275 
276     /** @deprecated since 1.1, use attributes instead */
277     private String oldComputeValue( LogEvent logEvent )
278     {
279         // initialize attributeInfo if necessary
280         if ( attributeInfo == null )
281         {
282             LogType.Description description = logEvent.getFactory().getDescription();
283             attributeInfo = description.getAttributeInfo( attribute );
284             if ( StringUtils.isNotEmpty( attribute2 ) )
285             {
286                 attributeInfo2 = description.getAttributeInfo( attribute2 );
287                 if ( StringUtils.isNotEmpty( attribute3 ) )
288                 {
289                     attributeInfo3 = description.getAttributeInfo( attribute3 );
290                 }
291             }
292         }
293 
294         // calculate freq value
295         String value = logEvent.getValue( attributeInfo );
296         if ( attributeInfo2 != null )
297         {
298             value += '-' + logEvent.getValue( attributeInfo2 );
299             if ( attributeInfo3 != null )
300             {
301                 value += '-' + logEvent.getValue( attributeInfo3 );
302             }
303         }
304         if ( regexp != null )
305         {
306             Matcher matcher = regexp.matcher( value );
307             if ( matcher.find() && ( regexpGroup <= matcher.groupCount() ) )
308             {
309                 if ( regexpGroup >= 0 )
310                 {
311                     value = matcher.group( regexpGroup );
312                 }
313                 else if ( matcher.groupCount() > 1 )
314                 {
315                     StringBuffer buff = new StringBuffer( matcher.group( 1 ) );
316                     for ( int i = 2; i <= matcher.groupCount(); i++ )
317                     {
318                         buff.append( '-' ).append( matcher.group( i ) );
319                     }
320                     value = buff.toString();
321                 }
322                 else if ( matcher.groupCount() == 1 )
323                 {
324                     value = matcher.group( 1 );
325                 }
326                 else
327                 {
328                     value = matcher.group( 0 );
329                 }
330             }
331             else
332             {
333                 value = "";
334             }
335             if ( value == null )
336             {
337                 value = "";
338             }
339         }
340 
341         return value;
342     }
343 
344     /**
345      * @since 1.1
346      */
347     protected void addLogEventToFreq( LogEvent logEvent, Freq freq )
348         throws IOException
349     {
350         // increase freq count
351         freq.count++;
352         int bytes = logEvent.getRawLog().length() + LogDistillation.LINE_SEPARATOR.length();
353         freq.bytes += bytes;
354     }
355 
356     public void end()
357         throws IOException
358     {
359         FreqOrder comparator = SORT_ITEM_COMPARATORS[sortItem];
360         comparator = reverseOrder ? new ReverseOrder( comparator ) : comparator;
361         sorted = new TreeSet<Freq>( comparator );
362         sorted.addAll( freqs.values() );
363     }
364 
365     public void appendGroupReport( ReportFormat.PluginReport report )
366     {
367         appendReport( report, maxGroupReport );
368     }
369 
370     public void appendGlobalReport( ReportFormat.PluginReport report )
371     {
372         appendReport( report, maxGlobalReport );
373     }
374 
375     SortedSet<Freq> getSorted()
376     {
377         return sorted;
378     }
379 
380     private String describeResult( int maxCount )
381     {
382         String count =
383             ( ( maxCount < 0 ) || ( maxCount >= sorted.size() ) ) ? String.valueOf( sorted.size() )
384                             : ( maxCount + "/" + sorted.size() );
385         String attrs;
386         if ( attributes == null )
387         {
388             attrs = attribute
389                 + ( ( attribute2 == null ) ? "" :
390                     ( "'-'" + attribute2 + ( ( attribute3 == null ) ? "" : ( "'-'" + attribute3 ) ) ) );
391         }
392         else
393         {
394             attrs = StringUtils.join( attributes, "'-'" );
395         }
396         return "values of " + ( regexpString == null ? "" : "regexp on " ) + "attribute '" + attrs + "'"
397             + " sorted by" + ( reverseOrder ? " reverse " : " " ) + SORT_ITEM_DESCRIPTIONS[sortItem] + " (" + count
398             + " different values)";
399     }
400 
401     private void addItems( ReportFormat.PluginReport report, int maxCount )
402     {
403         Iterator<Freq> iter = sorted.iterator();
404         while ( iter.hasNext() && ( ( maxCount < 0 ) || ( maxCount-- > 0 ) ) )
405         {
406             Freq freq = iter.next();
407 
408             // value displayed in the report
409             String value = ( freq.values == null ) ? freq.key : StringUtils.join( freq.values, '-' );
410 
411             report.addItem( freq.count, valueDescriptions.getProperty( value, value ) );
412         }
413     }
414 
415     private void appendReport( ReportFormat.PluginReport report, int maxCount )
416     {
417         report.beginPluginReport( this, describeResult( maxCount ) );
418         appendLinkToFreqReport( report );
419         addItems( report, maxCount );
420         report.endPluginReport();
421     }
422 
423     /**
424      * Append links to freq reports, for freq extensions.
425      *
426      * @param report the report formatter
427      */
428     protected void appendLinkToFreqReport( ReportFormat.PluginReport report )
429     {
430         // no additionnal link for freq: it's intended for subclasses
431     }
432 
433     public static class Freq
434         implements Serializable
435     {
436         private static final long serialVersionUID = 9195786567021952968L;
437 
438         public final String key;
439 
440         public final String[] values;
441 
442         public int count = 0;
443 
444         public long bytes = 0;
445 
446         public Freq( String key, String[] values )
447         {
448             this.key = key;
449             this.values = values;
450 
451             // get values Strings as substring from key, to avoid keeping a pointer to the initial log String
452             int index = 0;
453             for ( int i = 0; i < values.length; i++ )
454             {
455                 int start = index;
456                 index += values[i].length();
457                 values[i] = key.substring( start, index );
458                 index++;
459             }
460         }
461 
462         public static String asKey( String[] values )
463         {
464             return StringUtils.join( values, '\n' );
465         }
466     }
467 
468     private abstract static class FreqOrder
469         implements Comparator<Freq>, Serializable
470     {
471         private static final long serialVersionUID = -2574387085671510158L;
472     }
473 
474     /**
475      * Order on freq count, then on key when count are equal.
476      */
477     private static class CountOrder
478         extends FreqOrder
479     {
480         private static final long serialVersionUID = -933179294932120994L;
481 
482         public int compare( Freq freq1, Freq freq2 )
483         {
484             int diff = freq2.count - freq1.count;
485             return ( diff == 0 ) ? freq1.key.compareTo( freq2.key ) : diff;
486         }
487     }
488 
489     /**
490      * Order on key (lexical order).
491      */
492     private static class ValueOrder
493         extends FreqOrder
494     {
495         private static final long serialVersionUID = 1824757961838977443L;
496 
497         public int compare( Freq freq1, Freq freq2 )
498         {
499             return freq1.key.compareTo( freq2.key );
500         }
501     }
502 
503     /**
504      * Order on key length, then key lexical order.
505      * example: 1234 &gt; 234 &gt; 233 &gt; A5 &gt; 45
506      */
507     private static class ValueLengthOrder
508         extends FreqOrder
509     {
510         private static final long serialVersionUID = 651766724239811116L;
511 
512         public int compare( Freq freq1, Freq freq2 )
513         {
514             int diff = freq2.key.length() - freq1.key.length();
515             return ( diff == 0 ) ? freq2.key.compareTo( freq1.key ) : diff;
516         }
517     }
518 
519     private static class ReverseOrder
520         extends FreqOrder
521     {
522         private static final long serialVersionUID = 539304109704333616L;
523 
524         private final FreqOrder comparator;
525 
526         public ReverseOrder( FreqOrder comparator )
527         {
528             this.comparator = comparator;
529         }
530 
531         public int compare( Freq freq1, Freq freq2 )
532         {
533             return comparator.compare( freq2, freq1 );
534         }
535     }
536 
537     private static class Type
538         extends Plugin
539     {
540         public Type()
541         {
542             super( ID );
543         }
544 
545         public LogDistillation.Plugin newInstance( LogDistiller.Plugin conf )
546         {
547             return new FreqPlugin( conf );
548         }
549     }
550 }