|
New I/0 Functionality for JavaTM 2 Standard Edition 1.4(6) There is also a shortcut for matching with boolean b = Pattern.matches(".*\r?\n", aString), but it isn't efficient for when you need to recheck for matches as the Pattern is not compiled. To combine all the previously mentioned skills, the following example performs a Word/line count on file passed into the program: import Java.io.*;import Java.nio.*;import Java.nio.channels.*;import Java.nio.charset.*;import Java.util.*;import Java.util.regex.*;public class WordCount {public static void main(String args[]) throwsException { String filename = args[0]; // Map File from filename to byte buffer FileInputStream input = new FileInputStream(filename); FileChannel channel = input.getChannel(); int fileLength = (int)channel.size(); MappedByteBuffer buffer = channel.map(FileChannel.MAP_RO, 0,fileLength); // Convert to character buffer Charset charset = Charset.forName("ISO-8859-1"); CharsetDecoder decoder = charset.newDecoder(); CharBuffer charBuffer = decoder.decode(buffer); // Create line pattern Pattern linePattern = Pattern.compile(".*$",Pattern.MULTILINE); // Create Word pattern Pattern WordBreakPattern = Pattern.compile("[{space}{punct}]"); // Match line pattern to buffer Matcher lineMatcher = linePattern.matcher(charBuffer); Map map = new TreeMap(); Integer ONE = new Integer(1); // For each line while (lineMatcher.find()) {// Get lineCharSequence line = lineMatcher.group();// Get array of Words on lineString Words[] = WordBreakPattern.split(line);// For each Wordfor (int i=0, n=Words.length; i<n; i++) {if (Words[i].length() > 0) { Integer frequency = (Integer)map.get(Words[i]); if (frequency == null) {frequency = ONE; } else {int value = frequency.intValue();frequency = new Integer(value + 1); } map.put(Words[i], frequency);}} } System.out.println(map);}}
|