001    package net.sf.logdistiller.plugins;
002    
003    /*
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     *
008     *     http://www.apache.org/licenses/LICENSE-2.0
009     *
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    import java.io.*;
018    import java.util.*;
019    import java.util.regex.Matcher;
020    import java.util.regex.Pattern;
021    import java.util.regex.PatternSyntaxException;
022    
023    import org.apache.commons.lang.StringUtils;
024    
025    import net.sf.logdistiller.LogDistillation;
026    import net.sf.logdistiller.LogDistiller;
027    import net.sf.logdistiller.LogEvent;
028    import net.sf.logdistiller.LogType;
029    import net.sf.logdistiller.Plugin;
030    import net.sf.logdistiller.PluginConfigException;
031    import net.sf.logdistiller.ReportFormat;
032    
033    /**
034     * Counts the frequency of each attribute's value.
035     * <p>
036     * Parameters:
037     * </p>
038     * <ul>
039     * <li><b>value</b> (deprecated since 1.1): definition of the value that will be taken into account
040     * <ul>
041     * <li><code>attribute</code> (mandatory): the attribute's name
042     * <li><code>regexp</code>: use a regexp to extract value taken in account for the first attribute
043     * <li><code>regexp.group</code> (default: -1): which group to extract? If negative, all groups will be concatenated
044     * <li><code>attribute2</code> (default: none): second attribute to append
045     * <li><code>attribute3</code> (default: none): third attribute to append
046     * </ul>
047     * </li>
048     * <li><b>value</b>: definition of the value that will be taken into account
049     * <ul>
050     * <li><code>attributes</code> (mandatory): the attributes' names, comma separated (no count limit)
051     * </li>
052     * </ul>
053     * <li><b>sorting options</b>
054     * <ul>
055     * <li><code>sortItem</code> (default: <code>count</code>): sort on which item? <code>count</code>, <code>value</code>,
056     * <code>valueLength</code>
057     * <li><code>reverseOrder</code> (default: <code>false</code>): by default, most frequent value are displayed first
058     * </ul>
059     * </li>
060     * <li><b>reporting</b>
061     * <ul>
062     * <li><code>valueDescriptions</code>: description to insert in the report instead of corresponding value (in java
063     * properties format)
064     * <li><code>maxGlobalReport</code> (default: 5): maximum number of values inserted in global report
065     * <li><code>maxGroupReport</code> (default: 25): maximum number of values inserted in group report
066     * </ul>
067     * </li>
068     * </ul>
069     *
070     * @see SamplingPlugin
071     * @since 0.6
072     */
073    public class FreqPlugin
074        extends LogDistillation.Plugin
075    {
076        private static final long serialVersionUID = -5162563816094662936L;
077    
078        public final static String ID = "freq";
079    
080        public final static Plugin TYPE = new Type();
081    
082        private final static String[] SORT_ITEM_VALUES = { "count", "value", "valueLength" };
083    
084        private final static String[] SORT_ITEM_DESCRIPTIONS = { "count", "attribute's value", "attribute's value length" };
085    
086        private final static FreqOrder[] SORT_ITEM_COMPARATORS =
087            { new CountOrder(), new ValueOrder(), new ValueLengthOrder() };
088    
089        private final Map freqs = new HashMap(); // key = attribute's value (String), value = Freq instance
090    
091        protected SortedSet sorted; // sorted Freq
092    
093        /** @deprecated since 1.1, use attributes instead */
094        private final String attribute;
095    
096        /** @since 0.8
097         * @deprecated since 1.1, use attributes instead */
098        private final String attribute2;
099    
100        /** @since 0.8
101         * @deprecated since 1.1, use attributes instead */
102        private final String attribute3;
103    
104        /** @deprecated since 1.1, use attributes instead */
105        private transient LogType.AttributeInfo attributeInfo;
106    
107        /** @deprecated since 1.1, use attributes instead */
108        private transient LogType.AttributeInfo attributeInfo2;
109    
110        /** @deprecated since 1.1, use attributes instead */
111        private transient LogType.AttributeInfo attributeInfo3;
112    
113        private final int sortItem; // index in SORT_ITEM_VALUES
114    
115        private final boolean reverseOrder;
116    
117        private final Properties valueDescriptions = new Properties();
118    
119        private final int maxGlobalReport;
120    
121        private final int maxGroupReport;
122    
123        /** @deprecated since 1.1, use attributes instead */
124        private final String regexpString;
125    
126        /** @deprecated since 1.1, use attributes instead */
127        private transient Pattern regexp;
128    
129        /** @deprecated since 1.1, use attributes instead */
130        private final int regexpGroup;
131    
132        /** @since 1.1 */
133        protected final String[] attributes;
134    
135        protected transient LogType.AttributeInfo[] attributesInfo;
136    
137        public FreqPlugin( LogDistiller.Plugin definition )
138        {
139            this( definition, true );
140        }
141    
142        protected FreqPlugin( LogDistiller.Plugin definition, boolean checkSampling )
143        {
144            super( definition );
145            attribute = definition.getParam( "attribute" );
146            attribute2 = definition.getParam( "attribute2", null );
147            attribute3 = definition.getParam( "attribute3", null );
148            regexpString = definition.getParam( "regexp" );
149            updatePattern();
150            regexpGroup = Integer.parseInt( definition.getParam( "regexp.group", "-1" ) );
151            String attrs = definition.getParam( "attributes" );
152            attributes = ( attrs == null ) ? null : StringUtils.split( attrs, ", " );
153    
154            String attr = definition.getParam( "sortItem" );
155            sortItem = ( attr == null ) ? 0 : Arrays.binarySearch( SORT_ITEM_VALUES, attr );
156            attr = definition.getParam( "reverseOrder", "false" );
157            reverseOrder = Boolean.valueOf( attr ).booleanValue();
158            attr = definition.getParam( "valueDescriptions" );
159            if ( attr != null )
160            {
161                try
162                {
163                    valueDescriptions.load( new ByteArrayInputStream( attr.getBytes( "ISO-8859-1" ) ) );
164                }
165                catch ( IOException ioe )
166                {
167                    ioe.printStackTrace();
168                }
169            }
170            maxGlobalReport = Integer.parseInt( definition.getParam( "maxGlobalReport", "5" ) );
171            maxGroupReport = Integer.parseInt( definition.getParam( "maxGroupReport", "25" ) );
172    
173            if ( checkSampling
174                && ( ( definition.getParam( "sampling.maxCount" ) != null )
175                    || ( definition.getParam( "sampling.maxSize" ) != null ) || ( definition.getParam( "sampling.filename" ) != null ) ) )
176            {
177                LogDistiller ld = definition.getGroup().getLogdistiller();
178                ld.addWarning( "sampling features (sampling.* parameters) disabled from freq plugin: "
179                               + "moved to sampling plugin in LogDistiller 0.9" );
180            }
181    
182            if ( attribute != null )
183            {
184                LogDistiller ld = definition.getGroup().getLogdistiller();
185                ld.addWarning( "'attribute' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use 'attributes'" );
186                if ( regexp != null )
187                {
188                    ld.addWarning( "'regexp' parameter deprecated in " + getId() + " plugin since LogDistiller 1.1: use extended attributes" );
189                }
190            }
191        }
192    
193        private void readObject( ObjectInputStream in )
194            throws IOException, ClassNotFoundException
195        {
196            in.defaultReadObject();
197            updatePattern();
198        }
199    
200        /** @deprecated since 1.1, use attributes instead */
201        private void updatePattern()
202        {
203            try
204            {
205                regexp = ( regexpString == null ) ? null : Pattern.compile( regexpString );
206            }
207            catch ( PatternSyntaxException pse )
208            {
209                throw new PluginConfigException( "regexp parameter contains bad pattern '" + regexpString
210                    + "': caused an exception " + pse.getMessage() );
211            }
212        }
213    
214        public void begin( File destinationDirectory )
215            throws FileNotFoundException
216        {
217        }
218    
219        public void addLogEvent( LogEvent logEvent )
220            throws IOException
221        {
222            String[] values = extractAttributes( logEvent );
223    
224            String key = Freq.asKey( values );
225            Freq freq = (Freq) freqs.get( key );
226            if ( freq == null )
227            {
228                freq = new Freq( key, values );
229                freqs.put( key, freq );
230            }
231    
232            addLogEventToFreq( logEvent, freq );
233        }
234    
235        private String[] extractAttributes( LogEvent logEvent )
236        {
237            if ( attributes == null )
238            {
239                return new String[]{ oldComputeValue( logEvent ) };
240            }
241    
242            // initialize attributesInfo if necessary
243            if ( attributesInfo == null )
244            {
245                attributesInfo = new LogType.AttributeInfo[attributes.length];
246                LogType.Description description = logEvent.getFactory().getDescription();
247    
248                for ( int i = 0; i < attributes.length; i++ )
249                {
250                    attributesInfo[i] = description.getAttributeInfo( attributes[i] );
251                }
252            }
253    
254            String[] values = new String[attributesInfo.length];
255            for ( int i = 0; i < attributesInfo.length; i++ )
256            {
257                values[i] = logEvent.getValue( attributesInfo[i] );
258            }
259            return values;
260        }
261    
262        /** @deprecated since 1.1, use attributes instead */
263        private String oldComputeValue( LogEvent logEvent )
264        {
265            // initialize attributeInfo if necessary
266            if ( attributeInfo == null )
267            {
268                LogType.Description description = logEvent.getFactory().getDescription();
269                attributeInfo = description.getAttributeInfo( attribute );
270                if ( StringUtils.isNotEmpty( attribute2 ) )
271                {
272                    attributeInfo2 = description.getAttributeInfo( attribute2 );
273                    if ( StringUtils.isNotEmpty( attribute3 ) )
274                    {
275                        attributeInfo3 = description.getAttributeInfo( attribute3 );
276                    }
277                }
278            }
279    
280            // calculate freq value
281            String value = logEvent.getValue( attributeInfo );
282            if ( attributeInfo2 != null )
283            {
284                value += '-' + logEvent.getValue( attributeInfo2 );
285                if ( attributeInfo3 != null )
286                {
287                    value += '-' + logEvent.getValue( attributeInfo3 );
288                }
289            }
290            if ( regexp != null )
291            {
292                Matcher matcher = regexp.matcher( value );
293                if ( matcher.find() && ( regexpGroup <= matcher.groupCount() ) )
294                {
295                    if ( regexpGroup >= 0 )
296                    {
297                        value = matcher.group( regexpGroup );
298                    }
299                    else if ( matcher.groupCount() > 1 )
300                    {
301                        StringBuffer buff = new StringBuffer( matcher.group( 1 ) );
302                        for ( int i = 2; i <= matcher.groupCount(); i++ )
303                        {
304                            buff.append( '-' ).append( matcher.group( i ) );
305                        }
306                        value = buff.toString();
307                    }
308                    else if ( matcher.groupCount() == 1 )
309                    {
310                        value = matcher.group( 1 );
311                    }
312                    else
313                    {
314                        value = matcher.group( 0 );
315                    }
316                }
317                else
318                {
319                    value = "";
320                }
321                if ( value == null )
322                {
323                    value = "";
324                }
325            }
326    
327            return value;
328        }
329    
330        /**
331         * @since 1.1
332         */
333        protected void addLogEventToFreq( LogEvent logEvent, Freq freq )
334            throws IOException
335        {
336            // increase freq count
337            freq.count++;
338            int bytes = logEvent.getRawLog().length() + LogDistillation.LINE_SEPARATOR.length();
339            freq.bytes += bytes;
340        }
341    
342        public void end()
343            throws IOException
344        {
345            FreqOrder comparator = SORT_ITEM_COMPARATORS[sortItem];
346            comparator = reverseOrder ? new ReverseOrder( comparator ) : comparator;
347            sorted = new TreeSet( comparator );
348            sorted.addAll( freqs.values() );
349        }
350    
351        public void appendGroupReport( ReportFormat.PluginReport report )
352        {
353            appendReport( report, maxGroupReport );
354        }
355    
356        public void appendGlobalReport( ReportFormat.PluginReport report )
357        {
358            appendReport( report, maxGlobalReport );
359        }
360    
361        SortedSet getSorted()
362        {
363            return sorted;
364        }
365    
366        private String describeResult( int maxCount )
367        {
368            String count =
369                ( ( maxCount < 0 ) || ( maxCount >= sorted.size() ) ) ? String.valueOf( sorted.size() )
370                                : ( maxCount + "/" + sorted.size() );
371            String attrs;
372            if ( attributes == null )
373            {
374                attrs = attribute
375                    + ( ( attribute2 == null ) ? "" :
376                        ( "'-'" + attribute2 + ( ( attribute3 == null ) ? "" : ( "'-'" + attribute3 ) ) ) );
377            }
378            else
379            {
380                attrs = StringUtils.join( attributes, "'-'" );
381            }
382            return "values of " + ( regexpString == null ? "" : "regexp on " ) + "attribute '" + attrs + "'"
383                + " sorted by" + ( reverseOrder ? " reverse " : " " ) + SORT_ITEM_DESCRIPTIONS[sortItem] + " (" + count
384                + " different values)";
385        }
386    
387        private void addItems( ReportFormat.PluginReport report, int maxCount )
388        {
389            Iterator iter = sorted.iterator();
390            while ( iter.hasNext() && ( ( maxCount < 0 ) || ( maxCount-- > 0 ) ) )
391            {
392                Freq freq = (Freq) iter.next();
393    
394                // value displayed in the report
395                String value = ( freq.values == null ) ? freq.key : StringUtils.join( freq.values, '-' );
396    
397                report.addItem( freq.count, valueDescriptions.getProperty( value, value ) );
398            }
399        }
400    
401        private void appendReport( ReportFormat.PluginReport report, int maxCount )
402        {
403            report.beginPluginReport( this, describeResult( maxCount ) );
404            appendLinkToFreqReport( report );
405            addItems( report, maxCount );
406            report.endPluginReport();
407        }
408    
409        /**
410         * Append links to freq reports, for freq extensions.
411         *
412         * @param report the report formatter
413         */
414        protected void appendLinkToFreqReport( ReportFormat.PluginReport report )
415        {
416            // no additionnal link for freq: it's intended for subclasses
417        }
418    
419        public static class Freq
420            implements Serializable
421        {
422            private static final long serialVersionUID = 9195786567021952968L;
423    
424            public final String key;
425    
426            public final String[] values;
427    
428            public int count = 0;
429    
430            public long bytes = 0;
431    
432            public Freq( String key, String[] values )
433            {
434                this.key = key;
435                this.values = values;
436    
437                // get values Strings as substring from key, to avoid keeping a pointer to the initial log String
438                int index = 0;
439                for ( int i = 0; i < values.length; i++ )
440                {
441                    int start = index;
442                    index += values[i].length();
443                    values[i] = key.substring( start, index );
444                    index++;
445                }
446            }
447    
448            public static String asKey( String[] values )
449            {
450                return StringUtils.join( values, '\n' );
451            }
452        }
453    
454        private abstract static class FreqOrder
455            implements Comparator, Serializable
456        {
457        }
458    
459        /**
460         * Order on freq count, then on key when count are equal.
461         */
462        private static class CountOrder
463            extends FreqOrder
464        {
465            private static final long serialVersionUID = -933179294932120994L;
466    
467            public int compare( Object o1, Object o2 )
468            {
469                Freq freq1 = (Freq) o1;
470                Freq freq2 = (Freq) o2;
471                int diff = freq2.count - freq1.count;
472                return ( diff == 0 ) ? freq1.key.compareTo( freq2.key ) : diff;
473            }
474        }
475    
476        /**
477         * Order on key (lexical order).
478         */
479        private static class ValueOrder
480            extends FreqOrder
481        {
482            private static final long serialVersionUID = 1824757961838977443L;
483    
484            public int compare( Object o1, Object o2 )
485            {
486                Freq freq1 = (Freq) o1;
487                Freq freq2 = (Freq) o2;
488                return freq1.key.compareTo( freq2.key );
489            }
490        }
491    
492        /**
493         * Order on key length, then key lexical order.
494         * example: 1234 &gt; 234 &gt; 233 &gt; A5 &gt; 45
495         */
496        private static class ValueLengthOrder
497            extends FreqOrder
498        {
499            private static final long serialVersionUID = 651766724239811116L;
500    
501            public int compare( Object o1, Object o2 )
502            {
503                Freq freq1 = (Freq) o1;
504                Freq freq2 = (Freq) o2;
505                int diff = freq2.key.length() - freq1.key.length();
506                return ( diff == 0 ) ? freq2.key.compareTo( freq1.key ) : diff;
507            }
508        }
509    
510        private static class ReverseOrder
511            extends FreqOrder
512        {
513            private static final long serialVersionUID = 539304109704333616L;
514    
515            private final FreqOrder comparator;
516    
517            public ReverseOrder( FreqOrder comparator )
518            {
519                this.comparator = comparator;
520            }
521    
522            public int compare( Object o1, Object o2 )
523            {
524                return comparator.compare( o2, o1 );
525            }
526        }
527    
528        private static class Type
529            extends Plugin
530        {
531            public Type()
532            {
533                super( ID );
534            }
535    
536            public LogDistillation.Plugin newInstance( LogDistiller.Plugin conf )
537            {
538                return new FreqPlugin( conf );
539            }
540        }
541    }