View Javadoc

1   package net.sf.logdistiller.plugins;
2   
3   /*
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  import java.io.*;
18  import java.util.*;
19  import java.util.regex.Matcher;
20  import java.util.regex.Pattern;
21  import java.util.regex.PatternSyntaxException;
22  
23  import org.apache.commons.lang.StringUtils;
24  
25  import net.sf.logdistiller.LogDistillation;
26  import net.sf.logdistiller.LogDistiller;
27  import net.sf.logdistiller.LogEvent;
28  import net.sf.logdistiller.LogType;
29  import net.sf.logdistiller.Plugin;
30  import net.sf.logdistiller.PluginConfigException;
31  import net.sf.logdistiller.ReportFormat;
32  
33  /**
34   * Counts the frequency of each attribute's value.
35   * <p>
36   * Parameters:
37   * </p>
38   * <ul>
39   * <li><b>value</b> (deprecated since 1.1): definition of the value that will be taken into account
40   * <ul>
41   * <li><code>attribute</code> (mandatory): the attribute's name
42   * <li><code>regexp</code>: use a regexp to extract value taken in account for the first attribute
43   * <li><code>regexp.group</code> (default: -1): which group to extract? If negative, all groups will be concatenated
44   * <li><code>attribute2</code> (default: none): second attribute to append
45   * <li><code>attribute3</code> (default: none): third attribute to append
46   * </ul>
47   * </li>
48   * <li><b>value</b>: definition of the value that will be taken into account
49   * <ul>
50   * <li><code>attributes</code> (mandatory): the attributes' names, comma separated (no count limit)
51   * </li>
52   * </ul>
53   * <li><b>sorting options</b>
54   * <ul>
55   * <li><code>sortItem</code> (default: <code>count</code>): sort on which item? <code>count</code>, <code>value</code>,
56   * <code>valueLength</code>
57   * <li><code>reverseOrder</code> (default: <code>false</code>): by default, most frequent value are displayed first
58   * </ul>
59   * </li>
60   * <li><b>reporting</b>
61   * <ul>
62   * <li><code>valueDescriptions</code>: description to insert in the report instead of corresponding value (in java
63   * properties format)
64   * <li><code>maxGlobalReport</code> (default: 5): maximum number of values inserted in global report
65   * <li><code>maxGroupReport</code> (default: 25): maximum number of values inserted in group report
66   * </ul>
67   * </li>
68   * </ul>
69   *
70   * @see SamplingPlugin
71   * @since 0.6
72   */
73  public class FreqPlugin
74      extends LogDistillation.Plugin
75  {
76      private static final long serialVersionUID = -5162563816094662936L;
77  
78      public final static String ID = "freq";
79  
80      public final static Plugin TYPE = new Type();
81  
82      private final static String[] SORT_ITEM_VALUES = { "count", "value", "valueLength" };
83  
84      private final static String[] SORT_ITEM_DESCRIPTIONS = { "count", "attribute's value", "attribute's value length" };
85  
86      private final static FreqOrder[] SORT_ITEM_COMPARATORS =
87          { new CountOrder(), new ValueOrder(), new ValueLengthOrder() };
88  
89      private final Map freqs = new HashMap(); // key = attribute's value (String), value = Freq instance
90  
91      protected SortedSet sorted; // sorted Freq
92  
93      /** @deprecated since 1.1, use attributes instead */
94      private final String attribute;
95  
96      /** @since 0.8
97       * @deprecated since 1.1, use attributes instead */
98      private final String attribute2;
99  
100     /** @since 0.8
101      * @deprecated since 1.1, use attributes instead */
102     private final String attribute3;
103 
104     /** @deprecated since 1.1, use attributes instead */
105     private transient LogType.AttributeInfo attributeInfo;
106 
107     /** @deprecated since 1.1, use attributes instead */
108     private transient LogType.AttributeInfo attributeInfo2;
109 
110     /** @deprecated since 1.1, use attributes instead */
111     private transient LogType.AttributeInfo attributeInfo3;
112 
113     private final int sortItem; // index in SORT_ITEM_VALUES
114 
115     private final boolean reverseOrder;
116 
117     private final Properties valueDescriptions = new Properties();
118 
119     private final int maxGlobalReport;
120 
121     private final int maxGroupReport;
122 
123     /** @deprecated since 1.1, use attributes instead */
124     private final String regexpString;
125 
126     /** @deprecated since 1.1, use attributes instead */
127     private transient Pattern regexp;
128 
129     /** @deprecated since 1.1, use attributes instead */
130     private final int regexpGroup;
131 
132     /** @since 1.1 */
133     protected final String[] attributes;
134 
135     protected transient LogType.AttributeInfo[] attributesInfo;
136 
137     public FreqPlugin( LogDistiller.Plugin definition )
138     {
139         this( definition, true );
140     }
141 
142     protected FreqPlugin( LogDistiller.Plugin definition, boolean checkSampling )
143     {
144         super( definition );
145         attribute = definition.getParam( "attribute" );
146         attribute2 = definition.getParam( "attribute2", null );
147         attribute3 = definition.getParam( "attribute3", null );
148         regexpString = definition.getParam( "regexp" );
149         updatePattern();
150         regexpGroup = Integer.parseInt( definition.getParam( "regexp.group", "-1" ) );
151         String attrs = definition.getParam( "attributes" );
152         attributes = ( attrs == null ) ? null : StringUtils.split( attrs, ", " );
153 
154         String attr = definition.getParam( "sortItem" );
155         sortItem = ( attr == null ) ? 0 : Arrays.binarySearch( SORT_ITEM_VALUES, attr );
156         attr = definition.getParam( "reverseOrder", "false" );
157         reverseOrder = Boolean.valueOf( attr ).booleanValue();
158         attr = definition.getParam( "valueDescriptions" );
159         if ( attr != null )
160         {
161             try
162             {
163                 valueDescriptions.load( new ByteArrayInputStream( attr.getBytes( "ISO-8859-1" ) ) );
164             }
165             catch ( IOException ioe )
166             {
167                 ioe.printStackTrace();
168             }
169         }
170         maxGlobalReport = Integer.parseInt( definition.getParam( "maxGlobalReport", "5" ) );
171         maxGroupReport = Integer.parseInt( definition.getParam( "maxGroupReport", "25" ) );
172 
173         if ( checkSampling
174             && ( ( definition.getParam( "sampling.maxCount" ) != null )
175                 || ( definition.getParam( "sampling.maxSize" ) != null ) || ( definition.getParam( "sampling.filename" ) != null ) ) )
176         {
177             LogDistiller ld = definition.getGroup().getLogdistiller();
178             ld.addWarning( "sampling features (sampling.* parameters) disabled from freq plugin: "
179                            + "moved to sampling plugin in LogDistiller 0.9" );
180         }
181 
182         if ( attribute != null )
183         {
184             LogDistiller ld = definition.getGroup().getLogdistiller();
185             ld.addWarning( "'attribute' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use 'attributes'" );
186             if ( regexp != null )
187             {
188                 ld.addWarning( "'regexp' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use extended attributes" );
189             }
190         }
191     }
192 
193     private void readObject( ObjectInputStream in )
194         throws IOException, ClassNotFoundException
195     {
196         in.defaultReadObject();
197         updatePattern();
198     }
199 
200     /** @deprecated since 1.1, use attributes instead */
201     private void updatePattern()
202     {
203         try
204         {
205             regexp = ( regexpString == null ) ? null : Pattern.compile( regexpString );
206         }
207         catch ( PatternSyntaxException pse )
208         {
209             throw new PluginConfigException( "regexp parameter contains bad pattern '" + regexpString
210                 + "': caused an exception " + pse.getMessage() );
211         }
212     }
213 
214     public void begin( File destinationDirectory )
215         throws FileNotFoundException
216     {
217     }
218 
219     public void addLogEvent( LogEvent logEvent )
220         throws IOException
221     {
222         String[] values = extractAttributes( logEvent );
223 
224         String key = Freq.asKey( values );
225         Freq freq = (Freq) freqs.get( key );
226         if ( freq == null )
227         {
228             freq = new Freq( key, values );
229             freqs.put( key, freq );
230         }
231 
232         addLogEventToFreq( logEvent, freq );
233     }
234 
235     private String[] extractAttributes( LogEvent logEvent )
236     {
237         if ( attributes == null )
238         {
239             return new String[]{ oldComputeValue( logEvent ) };
240         }
241 
242         // initialize attributesInfo if necessary
243         if ( attributesInfo == null )
244         {
245             attributesInfo = new LogType.AttributeInfo[attributes.length];
246             LogType.Description description = logEvent.getFactory().getDescription();
247 
248             for ( int i = 0; i < attributes.length; i++ )
249             {
250                 attributesInfo[i] = description.getAttributeInfo( attributes[i] );
251             }
252         }
253 
254         String[] values = new String[attributesInfo.length];
255         for ( int i = 0; i < attributesInfo.length; i++ )
256         {
257             values[i] = logEvent.getValue( attributesInfo[i] );
258         }
259         return values;
260     }
261 
262     /** @deprecated since 1.1, use attributes instead */
263     private String oldComputeValue( LogEvent logEvent )
264     {
265         // initialize attributeInfo if necessary
266         if ( attributeInfo == null )
267         {
268             LogType.Description description = logEvent.getFactory().getDescription();
269             attributeInfo = description.getAttributeInfo( attribute );
270             if ( StringUtils.isNotEmpty( attribute2 ) )
271             {
272                 attributeInfo2 = description.getAttributeInfo( attribute2 );
273                 if ( StringUtils.isNotEmpty( attribute3 ) )
274                 {
275                     attributeInfo3 = description.getAttributeInfo( attribute3 );
276                 }
277             }
278         }
279 
280         // calculate freq value
281         String value = logEvent.getValue( attributeInfo );
282         if ( attributeInfo2 != null )
283         {
284             value += '-' + logEvent.getValue( attributeInfo2 );
285             if ( attributeInfo3 != null )
286             {
287                 value += '-' + logEvent.getValue( attributeInfo3 );
288             }
289         }
290         if ( regexp != null )
291         {
292             Matcher matcher = regexp.matcher( value );
293             if ( matcher.find() && ( regexpGroup <= matcher.groupCount() ) )
294             {
295                 if ( regexpGroup >= 0 )
296                 {
297                     value = matcher.group( regexpGroup );
298                 }
299                 else if ( matcher.groupCount() > 1 )
300                 {
301                     StringBuffer buff = new StringBuffer( matcher.group( 1 ) );
302                     for ( int i = 2; i <= matcher.groupCount(); i++ )
303                     {
304                         buff.append( '-' ).append( matcher.group( i ) );
305                     }
306                     value = buff.toString();
307                 }
308                 else if ( matcher.groupCount() == 1 )
309                 {
310                     value = matcher.group( 1 );
311                 }
312                 else
313                 {
314                     value = matcher.group( 0 );
315                 }
316             }
317             else
318             {
319                 value = "";
320             }
321             if ( value == null )
322             {
323                 value = "";
324             }
325         }
326 
327         return value;
328     }
329 
330     /**
331      * @since 1.1
332      */
333     protected void addLogEventToFreq( LogEvent logEvent, Freq freq )
334         throws IOException
335     {
336         // increase freq count
337         freq.count++;
338         int bytes = logEvent.getRawLog().length() + LogDistillation.LINE_SEPARATOR.length();
339         freq.bytes += bytes;
340     }
341 
342     public void end()
343         throws IOException
344     {
345         FreqOrder comparator = SORT_ITEM_COMPARATORS[sortItem];
346         comparator = reverseOrder ? new ReverseOrder( comparator ) : comparator;
347         sorted = new TreeSet( comparator );
348         sorted.addAll( freqs.values() );
349     }
350 
351     public void appendGroupReport( ReportFormat.PluginReport report )
352     {
353         appendReport( report, maxGroupReport );
354     }
355 
356     public void appendGlobalReport( ReportFormat.PluginReport report )
357     {
358         appendReport( report, maxGlobalReport );
359     }
360 
361     SortedSet getSorted()
362     {
363         return sorted;
364     }
365 
366     private String describeResult( int maxCount )
367     {
368         String count =
369             ( ( maxCount < 0 ) || ( maxCount >= sorted.size() ) ) ? String.valueOf( sorted.size() )
370                             : ( maxCount + "/" + sorted.size() );
371         String attrs;
372         if ( attributes == null )
373         {
374             attrs = attribute
375                 + ( ( attribute2 == null ) ? "" :
376                     ( "'-'" + attribute2 + ( ( attribute3 == null ) ? "" : ( "'-'" + attribute3 ) ) ) );
377         }
378         else
379         {
380             attrs = StringUtils.join( attributes, "'-'" );
381         }
382         return "values of " + ( regexpString == null ? "" : "regexp on " ) + "attribute '" + attrs + "'"
383             + " sorted by" + ( reverseOrder ? " reverse " : " " ) + SORT_ITEM_DESCRIPTIONS[sortItem] + " (" + count
384             + " different values)";
385     }
386 
387     private void addItems( ReportFormat.PluginReport report, int maxCount )
388     {
389         Iterator iter = sorted.iterator();
390         while ( iter.hasNext() && ( ( maxCount < 0 ) || ( maxCount-- > 0 ) ) )
391         {
392             Freq freq = (Freq) iter.next();
393 
394             // value displayed in the report
395             String value = ( freq.values == null ) ? freq.key : StringUtils.join( freq.values, '-' );
396 
397             report.addItem( freq.count, valueDescriptions.getProperty( value, value ) );
398         }
399     }
400 
401     private void appendReport( ReportFormat.PluginReport report, int maxCount )
402     {
403         report.beginPluginReport( this, describeResult( maxCount ) );
404         appendLinkToFreqReport( report );
405         addItems( report, maxCount );
406         report.endPluginReport();
407     }
408 
409     /**
410      * Append links to freq reports, for freq extensions.
411      *
412      * @param report the report formatter
413      */
414     protected void appendLinkToFreqReport( ReportFormat.PluginReport report )
415     {
416         // no additionnal link for freq: it's intended for subclasses
417     }
418 
419     public static class Freq
420         implements Serializable
421     {
422         private static final long serialVersionUID = 9195786567021952968L;
423 
424         public final String key;
425 
426         public final String[] values;
427 
428         public int count = 0;
429 
430         public long bytes = 0;
431 
432         public Freq( String key, String[] values )
433         {
434             this.key = key;
435             this.values = values;
436 
437             // get values Strings as substring from key, to avoid keeping a pointer to the initial log String
438             int index = 0;
439             for ( int i = 0; i < values.length; i++ )
440             {
441                 int start = index;
442                 index += values[i].length();
443                 values[i] = key.substring( start, index );
444                 index++;
445             }
446         }
447 
448         public static String asKey( String[] values )
449         {
450             return StringUtils.join( values, '\n' );
451         }
452     }
453 
454     private abstract static class FreqOrder
455         implements Comparator, Serializable
456     {
457     }
458 
459     /**
460      * Order on freq count, then on key when count are equal.
461      */
462     private static class CountOrder
463         extends FreqOrder
464     {
465         private static final long serialVersionUID = -933179294932120994L;
466 
467         public int compare( Object o1, Object o2 )
468         {
469             Freq freq1 = (Freq) o1;
470             Freq freq2 = (Freq) o2;
471             int diff = freq2.count - freq1.count;
472             return ( diff == 0 ) ? freq1.key.compareTo( freq2.key ) : diff;
473         }
474     }
475 
476     /**
477      * Order on key (lexical order).
478      */
479     private static class ValueOrder
480         extends FreqOrder
481     {
482         private static final long serialVersionUID = 1824757961838977443L;
483 
484         public int compare( Object o1, Object o2 )
485         {
486             Freq freq1 = (Freq) o1;
487             Freq freq2 = (Freq) o2;
488             return freq1.key.compareTo( freq2.key );
489         }
490     }
491 
492     /**
493      * Order on key length, then key lexical order.
494      * example: 1234 &gt; 234 &gt; 233 &gt; A5 &gt; 45
495      */
496     private static class ValueLengthOrder
497         extends FreqOrder
498     {
499         private static final long serialVersionUID = 651766724239811116L;
500 
501         public int compare( Object o1, Object o2 )
502         {
503             Freq freq1 = (Freq) o1;
504             Freq freq2 = (Freq) o2;
505             int diff = freq2.key.length() - freq1.key.length();
506             return ( diff == 0 ) ? freq2.key.compareTo( freq1.key ) : diff;
507         }
508     }
509 
510     private static class ReverseOrder
511         extends FreqOrder
512     {
513         private static final long serialVersionUID = 539304109704333616L;
514 
515         private final FreqOrder comparator;
516 
517         public ReverseOrder( FreqOrder comparator )
518         {
519             this.comparator = comparator;
520         }
521 
522         public int compare( Object o1, Object o2 )
523         {
524             return comparator.compare( o2, o1 );
525         }
526     }
527 
528     private static class Type
529         extends Plugin
530     {
531         public Type()
532         {
533             super( ID );
534         }
535 
536         public LogDistillation.Plugin newInstance( LogDistiller.Plugin conf )
537         {
538             return new FreqPlugin( conf );
539         }
540     }
541 }