import java.io.*; import java.text.*; import java.util.*; /** * FileSentenceReader - Uses sound i18n method to parse sentences. * Created 12/03, Last mofified 09/07 * @author: Tony Dahlman */ public class FileSentenceReader { /** Reads through a text file, parsing it into sentences * in sound i18n fashion. Returns an ArrayList of the * sentences. Just open the file and call FileSentenceReader. * getAllSentences( filename ). */ public static ArrayList getAllSentences( File f ) { BreakIterator sIter = BreakIterator.getSentenceInstance(); ArrayList aL = new ArrayList(); try{ BufferedReader ins = new BufferedReader( new FileReader( f ) ); StringBuffer sbuf = new StringBuffer(1025); boolean done = false; int chCount = 1024; char[] text = null; while( chCount > -1 && ! done ) { text = new char[1024]; chCount = ins.read( text, 0, 1024 ); if( chCount < 1024 ) done = true; // this is the last pass // build a string for the iterator from the stream sbuf.setLength(0); sbuf.append( text ); sbuf.append( ' ' ); String str = sbuf.toString(); sIter.setText( str ); int start = sIter.first(); int end = 0; for( end = sIter.next(); end != BreakIterator.DONE; start = end, end = sIter.next() ) { aL.add( str.substring( start, end ) ); } } ins.close(); // always close your streams! // Eliminate the CR/LF baggage at the end of the text buffer String s = (String)aL.remove( aL.size() - 1 ); s = s.trim(); aL.add( s ); } catch( IOException ioe ) { ioe.printStackTrace(); } return aL; } /** * test code */ public static void main(String[] args) { if( args.length != 1 ) return; File f = new File( args[0] ); if( !f.exists() ) return; if( f.isDirectory() || ! f.canRead() ) return; ArrayList aL = FileSentenceReader.getAllSentences( f ); System.out.println("Found " + aL.size() + " sentences in file " + f ); Iterator i = aL.iterator(); while( i.hasNext() ) System.out.println( (String)i.next() ); } }