001package net.sf.logdistiller.plugins;
002
003/*
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017import java.io.*;
018import java.util.*;
019import java.util.regex.Matcher;
020import java.util.regex.Pattern;
021import java.util.regex.PatternSyntaxException;
022
023import org.apache.commons.lang.StringUtils;
024
025import net.sf.logdistiller.LogDistillation;
026import net.sf.logdistiller.LogDistiller;
027import net.sf.logdistiller.LogEvent;
028import net.sf.logdistiller.LogType;
029import net.sf.logdistiller.Plugin;
030import net.sf.logdistiller.PluginConfigException;
031import net.sf.logdistiller.ReportFormat;
032
033/**
034 * Counts the frequency of each attribute's value.
035 * <p>
036 * Parameters:
037 * </p>
038 * <ul>
039 * <li><b>value</b> (deprecated since 1.1): definition of the value that will be taken into account
040 * <ul>
041 * <li><code>attribute</code> (mandatory): the attribute's name
042 * <li><code>regexp</code>: use a regexp to extract value taken in account for the first attribute
043 * <li><code>regexp.group</code> (default: -1): which group to extract? If negative, all groups will be concatenated
044 * <li><code>attribute2</code> (default: none): second attribute to append
045 * <li><code>attribute3</code> (default: none): third attribute to append
046 * </ul>
047 * </li>
048 * <li><b>value</b>: definition of the value that will be taken into account
049 * <ul>
050 * <li><code>attributes</code> (mandatory): the attributes' names, comma separated (no count limit)
051 * </li>
052 * </ul>
053 * <li><b>sorting options</b>
054 * <ul>
055 * <li><code>sortItem</code> (default: <code>count</code>): sort on which item? <code>count</code>, <code>value</code>,
056 * <code>valueLength</code>
057 * <li><code>reverseOrder</code> (default: <code>false</code>): by default, most frequent value are displayed first
058 * </ul>
059 * </li>
060 * <li><b>reporting</b>
061 * <ul>
062 * <li><code>valueDescriptions</code>: description to insert in the report instead of corresponding value (in java
063 * properties format)
064 * <li><code>maxGlobalReport</code> (default: 5): maximum number of values inserted in global report
065 * <li><code>maxGroupReport</code> (default: 25): maximum number of values inserted in group report
066 * </ul>
067 * </li>
068 * </ul>
069 *
070 * @see SamplingPlugin
071 * @since 0.6
072 */
073public class FreqPlugin
074    extends LogDistillation.Plugin
075{
076    private static final long serialVersionUID = -5162563816094662936L;
077
078    public final static String ID = "freq";
079
080    public final static Plugin TYPE = new Type();
081
082    private final static String[] SORT_ITEM_VALUES = { "count", "value", "valueLength" };
083
084    private final static String[] SORT_ITEM_DESCRIPTIONS = { "count", "attribute's value", "attribute's value length" };
085
086    private final static FreqOrder[] SORT_ITEM_COMPARATORS =
087        { new CountOrder(), new ValueOrder(), new ValueLengthOrder() };
088
089    private final Map<String, Freq> freqs = new HashMap<String, Freq>();
090
091    protected SortedSet<Freq> sorted; // sorted Freq
092
093    /** @deprecated since 1.1, use attributes instead */
094    private final String attribute;
095
096    /** @since 0.8
097     * @deprecated since 1.1, use attributes instead */
098    private final String attribute2;
099
100    /** @since 0.8
101     * @deprecated since 1.1, use attributes instead */
102    private final String attribute3;
103
104    /** @deprecated since 1.1, use attributes instead */
105    private transient LogType.AttributeInfo attributeInfo;
106
107    /** @deprecated since 1.1, use attributes instead */
108    private transient LogType.AttributeInfo attributeInfo2;
109
110    /** @deprecated since 1.1, use attributes instead */
111    private transient LogType.AttributeInfo attributeInfo3;
112
113    private final int sortItem; // index in SORT_ITEM_VALUES
114
115    private final boolean reverseOrder;
116
117    private final Properties valueDescriptions = new Properties();
118
119    private final int maxGlobalReport;
120
121    private final int maxGroupReport;
122
123    /** @deprecated since 1.1, use attributes instead */
124    private final String regexpString;
125
126    /** @deprecated since 1.1, use attributes instead */
127    private transient Pattern regexp;
128
129    /** @deprecated since 1.1, use attributes instead */
130    private final int regexpGroup;
131
132    /** @since 1.1 */
133    protected final String[] attributes;
134
135    protected transient LogType.AttributeInfo[] attributesInfo;
136
137    public FreqPlugin( LogDistiller.Plugin definition )
138    {
139        this( definition, true );
140    }
141
142    protected FreqPlugin( LogDistiller.Plugin definition, boolean checkSampling )
143    {
144        super( definition );
145        LogDistiller.Group group = definition.getGroup();
146        String context = "(logdistiller/group[id='" + ( ( group == null ) ? "default-id" : group.getId() )
147            + "]/plugin[type='" + definition.getType() + "'])";
148
149        attribute = definition.getParam( "attribute" );
150        attribute2 = definition.getParam( "attribute2", null );
151        attribute3 = definition.getParam( "attribute3", null );
152        regexpString = definition.getParam( "regexp" );
153        updatePattern();
154        regexpGroup = Integer.parseInt( definition.getParam( "regexp.group", "-1" ) );
155        String attrs = definition.getParam( "attributes" );
156        attributes = ( attrs == null ) ? null : StringUtils.split( attrs, ", " );
157
158        String attr = definition.getParam( "sortItem" );
159        int sortItem_ = ( attr == null ) ? 0 : Arrays.binarySearch( SORT_ITEM_VALUES, attr );
160        if ( sortItem_ < 0 )
161        {
162            LogDistiller ld = group.getLogdistiller();
163            ld.addWarning( context + " unsupported '" + attr
164                + "' sortItem value: choose one of [ 'count', 'value', 'valueLength' ], defaulting to 'count'" );
165            sortItem_ = 0;
166        }
167        sortItem = sortItem_;
168
169        attr = definition.getParam( "reverseOrder", "false" );
170        reverseOrder = Boolean.valueOf( attr ).booleanValue();
171        attr = definition.getParam( "valueDescriptions" );
172        if ( attr != null )
173        {
174            try
175            {
176                valueDescriptions.load( new ByteArrayInputStream( attr.getBytes( "ISO-8859-1" ) ) );
177            }
178            catch ( IOException ioe )
179            {
180                ioe.printStackTrace();
181            }
182        }
183        maxGlobalReport = Integer.parseInt( definition.getParam( "maxGlobalReport", "5" ) );
184        maxGroupReport = Integer.parseInt( definition.getParam( "maxGroupReport", "25" ) );
185
186        if ( checkSampling
187            && ( ( definition.getParam( "sampling.maxCount" ) != null )
188                || ( definition.getParam( "sampling.maxSize" ) != null )
189                || ( definition.getParam( "sampling.filename" ) != null ) ) )
190        {
191            LogDistiller ld = group.getLogdistiller();
192            ld.addWarning( context + " sampling features (sampling.* parameters) disabled from freq plugin: "
193                           + "moved to sampling plugin since LogDistiller 0.9" );
194        }
195
196        if ( attribute != null )
197        {
198            LogDistiller ld = group.getLogdistiller();
199            ld.addWarning( context + " 'attribute' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use 'attributes'" );
200            if ( regexp != null )
201            {
202                ld.addWarning( context + " 'regexp' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use extended attributes" );
203            }
204        }
205    }
206
207    private void readObject( ObjectInputStream in )
208        throws IOException, ClassNotFoundException
209    {
210        in.defaultReadObject();
211        updatePattern();
212    }
213
214    /** @deprecated since 1.1, use attributes instead */
215    private void updatePattern()
216    {
217        try
218        {
219            regexp = ( regexpString == null ) ? null : Pattern.compile( regexpString );
220        }
221        catch ( PatternSyntaxException pse )
222        {
223            throw new PluginConfigException( "regexp parameter contains bad pattern '" + regexpString
224                + "': caused an exception " + pse.getMessage() );
225        }
226    }
227
228    public void begin( File destinationDirectory )
229        throws FileNotFoundException
230    {
231    }
232
233    public void addLogEvent( LogEvent logEvent )
234        throws IOException
235    {
236        String[] values = extractAttributes( logEvent );
237
238        String key = Freq.asKey( values );
239        Freq freq = (Freq) freqs.get( key );
240        if ( freq == null )
241        {
242            freq = new Freq( key, values );
243            freqs.put( key, freq );
244        }
245
246        addLogEventToFreq( logEvent, freq );
247    }
248
249    private String[] extractAttributes( LogEvent logEvent )
250    {
251        if ( attributes == null )
252        {
253            return new String[]{ oldComputeValue( logEvent ) };
254        }
255
256        // initialize attributesInfo if necessary
257        if ( attributesInfo == null )
258        {
259            attributesInfo = new LogType.AttributeInfo[attributes.length];
260            LogType.Description description = logEvent.getFactory().getDescription();
261
262            for ( int i = 0; i < attributes.length; i++ )
263            {
264                attributesInfo[i] = description.getAttributeInfo( attributes[i] );
265            }
266        }
267
268        String[] values = new String[attributesInfo.length];
269        for ( int i = 0; i < attributesInfo.length; i++ )
270        {
271            values[i] = logEvent.getValue( attributesInfo[i] );
272        }
273        return values;
274    }
275
276    /** @deprecated since 1.1, use attributes instead */
277    private String oldComputeValue( LogEvent logEvent )
278    {
279        // initialize attributeInfo if necessary
280        if ( attributeInfo == null )
281        {
282            LogType.Description description = logEvent.getFactory().getDescription();
283            attributeInfo = description.getAttributeInfo( attribute );
284            if ( StringUtils.isNotEmpty( attribute2 ) )
285            {
286                attributeInfo2 = description.getAttributeInfo( attribute2 );
287                if ( StringUtils.isNotEmpty( attribute3 ) )
288                {
289                    attributeInfo3 = description.getAttributeInfo( attribute3 );
290                }
291            }
292        }
293
294        // calculate freq value
295        String value = logEvent.getValue( attributeInfo );
296        if ( attributeInfo2 != null )
297        {
298            value += '-' + logEvent.getValue( attributeInfo2 );
299            if ( attributeInfo3 != null )
300            {
301                value += '-' + logEvent.getValue( attributeInfo3 );
302            }
303        }
304        if ( regexp != null )
305        {
306            Matcher matcher = regexp.matcher( value );
307            if ( matcher.find() && ( regexpGroup <= matcher.groupCount() ) )
308            {
309                if ( regexpGroup >= 0 )
310                {
311                    value = matcher.group( regexpGroup );
312                }
313                else if ( matcher.groupCount() > 1 )
314                {
315                    StringBuffer buff = new StringBuffer( matcher.group( 1 ) );
316                    for ( int i = 2; i <= matcher.groupCount(); i++ )
317                    {
318                        buff.append( '-' ).append( matcher.group( i ) );
319                    }
320                    value = buff.toString();
321                }
322                else if ( matcher.groupCount() == 1 )
323                {
324                    value = matcher.group( 1 );
325                }
326                else
327                {
328                    value = matcher.group( 0 );
329                }
330            }
331            else
332            {
333                value = "";
334            }
335            if ( value == null )
336            {
337                value = "";
338            }
339        }
340
341        return value;
342    }
343
344    /**
345     * @since 1.1
346     */
347    protected void addLogEventToFreq( LogEvent logEvent, Freq freq )
348        throws IOException
349    {
350        // increase freq count
351        freq.count++;
352        int bytes = logEvent.getRawLog().length() + LogDistillation.LINE_SEPARATOR.length();
353        freq.bytes += bytes;
354    }
355
356    public void end()
357        throws IOException
358    {
359        FreqOrder comparator = SORT_ITEM_COMPARATORS[sortItem];
360        comparator = reverseOrder ? new ReverseOrder( comparator ) : comparator;
361        sorted = new TreeSet<Freq>( comparator );
362        sorted.addAll( freqs.values() );
363    }
364
365    public void appendGroupReport( ReportFormat.PluginReport report )
366    {
367        appendReport( report, maxGroupReport );
368    }
369
370    public void appendGlobalReport( ReportFormat.PluginReport report )
371    {
372        appendReport( report, maxGlobalReport );
373    }
374
375    SortedSet<Freq> getSorted()
376    {
377        return sorted;
378    }
379
380    private String describeResult( int maxCount )
381    {
382        String count =
383            ( ( maxCount < 0 ) || ( maxCount >= sorted.size() ) ) ? String.valueOf( sorted.size() )
384                            : ( maxCount + "/" + sorted.size() );
385        String attrs;
386        if ( attributes == null )
387        {
388            attrs = attribute
389                + ( ( attribute2 == null ) ? "" :
390                    ( "'-'" + attribute2 + ( ( attribute3 == null ) ? "" : ( "'-'" + attribute3 ) ) ) );
391        }
392        else
393        {
394            attrs = StringUtils.join( attributes, "'-'" );
395        }
396        return "values of " + ( regexpString == null ? "" : "regexp on " ) + "attribute '" + attrs + "'"
397            + " sorted by" + ( reverseOrder ? " reverse " : " " ) + SORT_ITEM_DESCRIPTIONS[sortItem] + " (" + count
398            + " different values)";
399    }
400
401    private void addItems( ReportFormat.PluginReport report, int maxCount )
402    {
403        Iterator<Freq> iter = sorted.iterator();
404        while ( iter.hasNext() && ( ( maxCount < 0 ) || ( maxCount-- > 0 ) ) )
405        {
406            Freq freq = iter.next();
407
408            // value displayed in the report
409            String value = ( freq.values == null ) ? freq.key : StringUtils.join( freq.values, '-' );
410
411            report.addItem( freq.count, valueDescriptions.getProperty( value, value ) );
412        }
413    }
414
415    private void appendReport( ReportFormat.PluginReport report, int maxCount )
416    {
417        report.beginPluginReport( this, describeResult( maxCount ) );
418        appendLinkToFreqReport( report );
419        addItems( report, maxCount );
420        report.endPluginReport();
421    }
422
423    /**
424     * Append links to freq reports, for freq extensions.
425     *
426     * @param report the report formatter
427     */
428    protected void appendLinkToFreqReport( ReportFormat.PluginReport report )
429    {
430        // no additionnal link for freq: it's intended for subclasses
431    }
432
433    public static class Freq
434        implements Serializable
435    {
436        private static final long serialVersionUID = 9195786567021952968L;
437
438        public final String key;
439
440        public final String[] values;
441
442        public int count = 0;
443
444        public long bytes = 0;
445
446        public Freq( String key, String[] values )
447        {
448            this.key = key;
449            this.values = values;
450
451            // get values Strings as substring from key, to avoid keeping a pointer to the initial log String
452            int index = 0;
453            for ( int i = 0; i < values.length; i++ )
454            {
455                int start = index;
456                index += values[i].length();
457                values[i] = key.substring( start, index );
458                index++;
459            }
460        }
461
462        public static String asKey( String[] values )
463        {
464            return StringUtils.join( values, '\n' );
465        }
466    }
467
468    private abstract static class FreqOrder
469        implements Comparator<Freq>, Serializable
470    {
471        private static final long serialVersionUID = -2574387085671510158L;
472    }
473
474    /**
475     * Order on freq count, then on key when count are equal.
476     */
477    private static class CountOrder
478        extends FreqOrder
479    {
480        private static final long serialVersionUID = -933179294932120994L;
481
482        public int compare( Freq freq1, Freq freq2 )
483        {
484            int diff = freq2.count - freq1.count;
485            return ( diff == 0 ) ? freq1.key.compareTo( freq2.key ) : diff;
486        }
487    }
488
489    /**
490     * Order on key (lexical order).
491     */
492    private static class ValueOrder
493        extends FreqOrder
494    {
495        private static final long serialVersionUID = 1824757961838977443L;
496
497        public int compare( Freq freq1, Freq freq2 )
498        {
499            return freq1.key.compareTo( freq2.key );
500        }
501    }
502
503    /**
504     * Order on key length, then key lexical order.
505     * example: 1234 &gt; 234 &gt; 233 &gt; A5 &gt; 45
506     */
507    private static class ValueLengthOrder
508        extends FreqOrder
509    {
510        private static final long serialVersionUID = 651766724239811116L;
511
512        public int compare( Freq freq1, Freq freq2 )
513        {
514            int diff = freq2.key.length() - freq1.key.length();
515            return ( diff == 0 ) ? freq2.key.compareTo( freq1.key ) : diff;
516        }
517    }
518
519    private static class ReverseOrder
520        extends FreqOrder
521    {
522        private static final long serialVersionUID = 539304109704333616L;
523
524        private final FreqOrder comparator;
525
526        public ReverseOrder( FreqOrder comparator )
527        {
528            this.comparator = comparator;
529        }
530
531        public int compare( Freq freq1, Freq freq2 )
532        {
533            return comparator.compare( freq2, freq1 );
534        }
535    }
536
537    private static class Type
538        extends Plugin
539    {
540        public Type()
541        {
542            super( ID );
543        }
544
545        public LogDistillation.Plugin newInstance( LogDistiller.Plugin conf )
546        {
547            return new FreqPlugin( conf );
548        }
549    }
550}