Note that there are some explanatory texts on larger screens.

plurals
  1. PO
    text
    copied!<p>The class below is named PorterAlgo and has various functions for stemming.</p> <pre><code>package com.mycompany.algo; class NewString { public String str; NewString() { str = ""; } } public class PorterAlgo { String Clean( String str ) { int last = str.length(); new Character( str.charAt(0) ); String temp = ""; for ( int i=0; i &lt; last; i++ ) { if ( Character.isLetterOrDigit( str.charAt(i) ) ) temp += str.charAt(i); } return temp; } //clean boolean hasSuffix( String word, String suffix, NewString stem ) { String tmp = ""; if ( word.length() &lt;= suffix.length() ) return false; if (suffix.length() &gt; 1) if ( word.charAt( word.length()-2 ) != suffix.charAt( suffix.length()-2 ) ) return false; stem.str = ""; for ( int i=0; i&lt;word.length()-suffix.length(); i++ ) stem.str += word.charAt( i ); tmp = stem.str; for ( int i=0; i&lt;suffix.length(); i++ ) tmp += suffix.charAt( i ); if ( tmp.compareTo( word ) == 0 ) return true; else return false; } boolean vowel( char ch, char prev ) { switch ( ch ) { case 'a': case 'e': case 'i': case 'o': case 'u': return true; case 'y': { switch ( prev ) { case 'a': case 'e': case 'i': case 'o': case 'u': return false; default: return true; } } default : return false; } } int measure( String stem ) { int i=0, count = 0; int length = stem.length(); while ( i &lt; length ) { for ( ; i &lt; length ; i++ ) { if ( i &gt; 0 ) { if ( vowel(stem.charAt(i),stem.charAt(i-1)) ) break; } else { if ( vowel(stem.charAt(i),'a') ) break; } } for ( i++ ; i &lt; length ; i++ ) { if ( i &gt; 0 ) { if ( !vowel(stem.charAt(i),stem.charAt(i-1)) ) break; } else { if ( !vowel(stem.charAt(i),'?') ) break; } } if ( i &lt; length ) { count++; i++; } } //while return(count); } boolean containsVowel( String word ) { for (int i=0 ; i &lt; word.length(); i++ ) if ( i &gt; 0 ) { if ( vowel(word.charAt(i),word.charAt(i-1)) ) return true; } else { if ( vowel(word.charAt(0),'a') ) return true; } return false; } boolean cvc( String str ) { int length=str.length(); if ( length &lt; 3 ) return false; if ( (!vowel(str.charAt(length-1),str.charAt(length-2)) ) &amp;&amp; (str.charAt(length-1) != 'w') &amp;&amp; (str.charAt(length-1) != 'x') &amp;&amp; (str.charAt(length-1) != 'y') &amp;&amp; (vowel(str.charAt(length-2),str.charAt(length-3))) ) { if (length == 3) { if (!vowel(str.charAt(0),'?')) return true; else return false; } else { if (!vowel(str.charAt(length-3),str.charAt(length-4)) ) return true; else return false; } } return false; } String step1( String str ) { NewString stem = new NewString(); if ( str.charAt( str.length()-1 ) == 's' ) { if ( (hasSuffix( str, "sses", stem )) || (hasSuffix( str, "ies", stem)) ){ String tmp = ""; for (int i=0; i&lt;str.length()-2; i++) tmp += str.charAt(i); str = tmp; } else { if ( ( str.length() == 1 ) &amp;&amp; ( str.charAt(str.length()-1) == 's' ) ) { str = ""; return str; } if ( str.charAt( str.length()-2 ) != 's' ) { String tmp = ""; for (int i=0; i&lt;str.length()-1; i++) tmp += str.charAt(i); str = tmp; } } } if ( hasSuffix( str,"eed",stem ) ) { if ( measure( stem.str ) &gt; 0 ) { String tmp = ""; for (int i=0; i&lt;str.length()-1; i++) tmp += str.charAt( i ); str = tmp; } } else { if ( (hasSuffix( str,"ed",stem )) || (hasSuffix( str,"ing",stem )) ) { if (containsVowel( stem.str )) { String tmp = ""; for ( int i = 0; i &lt; stem.str.length(); i++) tmp += str.charAt( i ); str = tmp; if ( str.length() == 1 ) return str; if ( ( hasSuffix( str,"at",stem) ) || ( hasSuffix( str,"bl",stem ) ) || ( hasSuffix( str,"iz",stem) ) ) { str += "e"; } else { int length = str.length(); if ( (str.charAt(length-1) == str.charAt(length-2)) &amp;&amp; (str.charAt(length-1) != 'l') &amp;&amp; (str.charAt(length-1) != 's') &amp;&amp; (str.charAt(length-1) != 'z') ) { tmp = ""; for (int i=0; i&lt;str.length()-1; i++) tmp += str.charAt(i); str = tmp; } else if ( measure( str ) == 1 ) { if ( cvc(str) ) str += "e"; } } } } } if ( hasSuffix(str,"y",stem) ) if ( containsVowel( stem.str ) ) { String tmp = ""; for (int i=0; i&lt;str.length()-1; i++ ) tmp += str.charAt(i); str = tmp + "i"; } return str; } String step2( String str ) { String[][] suffixes = { { "ational", "ate" }, { "tional", "tion" }, { "enci", "ence" }, { "anci", "ance" }, { "izer", "ize" }, { "iser", "ize" }, { "abli", "able" }, { "alli", "al" }, { "entli", "ent" }, { "eli", "e" }, { "ousli", "ous" }, { "ization", "ize" }, { "isation", "ize" }, { "ation", "ate" }, { "ator", "ate" }, { "alism", "al" }, { "iveness", "ive" }, { "fulness", "ful" }, { "ousness", "ous" }, { "aliti", "al" }, { "iviti", "ive" }, { "biliti", "ble" }}; NewString stem = new NewString(); for ( int index = 0 ; index &lt; suffixes.length; index++ ) { if ( hasSuffix ( str, suffixes[index][0], stem ) ) { if ( measure ( stem.str ) &gt; 0 ) { str = stem.str + suffixes[index][1]; return str; } } } return str; } String step3( String str ) { String[][] suffixes = { { "icate", "ic" }, { "ative", "" }, { "alize", "al" }, { "alise", "al" }, { "iciti", "ic" }, { "ical", "ic" }, { "ful", "" }, { "ness", "" }}; NewString stem = new NewString(); for ( int index = 0 ; index&lt;suffixes.length; index++ ) { if ( hasSuffix ( str, suffixes[index][0], stem )) if ( measure ( stem.str ) &gt; 0 ) { str = stem.str + suffixes[index][1]; return str; } } return str; } String step4( String str ) { String[] suffixes = { "al", "ance", "ence", "er", "ic", "able", "ible", "ant", "ement", "ment", "ent", "sion", "tion", "ou", "ism", "ate", "iti", "ous", "ive", "ize", "ise"}; NewString stem = new NewString(); for ( int index = 0 ; index&lt;suffixes.length; index++ ) { if ( hasSuffix ( str, suffixes[index], stem ) ) { if ( measure ( stem.str ) &gt; 1 ) { str = stem.str; return str; } } } return str; } String step5( String str ) { if ( str.charAt(str.length()-1) == 'e' ) { if ( measure(str) &gt; 1 ) {/* measure(str)==measure(stem) if ends in vowel */ String tmp = ""; for ( int i=0; i&lt;str.length()-1; i++ ) tmp += str.charAt( i ); str = tmp; } else if ( measure(str) == 1 ) { String stem = ""; for ( int i=0; i&lt;str.length()-1; i++ ) stem += str.charAt( i ); if ( !cvc(stem) ) str = stem; } } if ( str.length() == 1 ) return str; if ( (str.charAt(str.length()-1) == 'l') &amp;&amp; (str.charAt(str.length()-2) == 'l') &amp;&amp; (measure(str) &gt; 1) ) if ( measure(str) &gt; 1 ) {/* measure(str)==measure(stem) if ends in vowel */ String tmp = ""; for ( int i=0; i&lt;str.length()-1; i++ ) tmp += str.charAt( i ); str = tmp; } return str; } String stripPrefixes ( String str) { String[] prefixes = { "kilo", "micro", "milli", "intra", "ultra", "mega", "nano", "pico", "pseudo"}; int last = prefixes.length; for ( int i=0 ; i&lt;last; i++ ) { if ( str.startsWith( prefixes[i] ) ) { String temp = ""; for ( int j=0 ; j&lt; str.length()-prefixes[i].length(); j++ ) temp += str.charAt( j+prefixes[i].length() ); return temp; } } return str; } private String stripSuffixes( String str ) { str = step1( str ); if ( str.length() &gt;= 1 ) str = step2( str ); if ( str.length() &gt;= 1 ) str = step3( str ); if ( str.length() &gt;= 1 ) str = step4( str ); if ( str.length() &gt;= 1 ) str = step5( str ); return str; } public String stripAffixes( String str ) { str = str.toLowerCase(); str = Clean(str); if (( str != "" ) &amp;&amp; (str.length() &gt; 2)) { str = stripPrefixes(str); if (str != "" ) str = stripSuffixes(str); } return str; } //stripAffixes } //class </code></pre> <p>Given below is a class PorterCheck.java</p> <pre><code>package com.mycompany.algo; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.*; public class PorterCheck { private static final String DEFAULT_TEST_FILE = "C:/Users/vaibhav/Desktop/rev.txt"; public static void main(String args[]) throws IOException{ PorterAlgo pa = new PorterAlgo(); //checks for vowels in a given string System.out.println(pa.containsVowel("vaibhav")); //removes special characters System.out.println(pa.Clean("vaibhav's book")); //check for a given suffix NewString stem = new NewString(); System.out.println(pa.hasSuffix("corresponding","ing",stem)); //stemming the words ArrayList&lt;String&gt; tok = new ArrayList&lt;String&gt;(); String[] tokens = {"normalize","technical","education"}; for (String x: tokens){ tok.add(x); } System.out.println(completeStem(tok)); String fileName = ((args.length &gt; 0) ? args[0] : DEFAULT_TEST_FILE); FileReader fileReader = new FileReader(new File(fileName)); FileTokenizer fileTokenizer = new FileTokenizer(); List&lt;String&gt; tokens1 = fileTokenizer.tokenize(fileReader); System.out.println("Tokenizing the input file:"); System.out.print(completeStem(tokens1)); } //method to completely stem the words in an array list public static ArrayList&lt;String&gt; completeStem(List&lt;String&gt; tokens1){ PorterAlgo pa = new PorterAlgo(); ArrayList&lt;String&gt; arrstr = new ArrayList&lt;String&gt;(); for (String i : tokens1){ String s1 = pa.step1(i); String s2 = pa.step2(s1); String s3= pa.step3(s2); String s4= pa.step4(s3); String s5= pa.step5(s4); arrstr.add(s5); } return arrstr; } //method to tokenize a file public static ArrayList&lt;String&gt; fileTokenizer(){ StringTokenizer strtoken = new StringTokenizer("this is a book"); ArrayList&lt;String&gt; filetoken = new ArrayList&lt;String&gt;(); while(strtoken.hasMoreElements()){ filetoken.add(strtoken.nextToken()); } return filetoken; } } </code></pre> <p>Hope this helps you :D</p>
 

Querying!

 
Guidance

SQuiL has stopped working due to an internal error.

If you are curious you may find further information in the browser console, which is accessible through the devtools (F12).

Reload