Re: Fastest! Counting words (Mirek Fidler.. continues)
On Mon, 07 Apr 2008 08:31:22 -0500, Razii
<DONTwhatevere3e@hotmail.com> wrote:
40 MB file
pmk time 813ms (java client)
pmk time 828ms (java server)
pmk time 593ms (Jet)
Time: 828 ms (UPP)
Memory usage: Java version 57 MB
Memory usage: UPP version 42 MB
80 MB file
pmk time 1562ms (java client)
pmk time 1547ms (java -server)
pmk time 1172ms (Jet)
Changed the name of the class to Jwc. Also, changed "worker" to
static, made it final static Jwc worker;
The new times...(best of fives)
40 MB file
Time: 812 ms (U++)
Time 750 ms (java client)
Time 750 ms (java server)
Time 562 ms (Java Jet)
3 MB file
Time: 78 ms (U++)
Time 78 ms (java client)
Time 109 ms (java server)
Time 47 ms (Java Jet)
80 MB file...
Time: 1640 ms (U++)
Time 1500 ms (java client)
Time 1469 ms (java server)
Time 1125 ms (Java Jet)
This is officially King of the Hill...
------------------------------------------
http://www.pastebin.ca/976736
//by pm_kirkham
import java.io.*;
public final class Jwc {
public static void main(final String[] args) throws Exception {
final long starttime = System.currentTimeMillis();
for (String arg : args)
worker.processFile(arg);
final long stoptime = System.currentTimeMillis();
worker.printResults(args.length > 0);
System.out.println("Time " + (stoptime - starttime) + " ms");
} //end of main
final static Jwc worker = new Jwc ();
int totalWords = 0;
int totalLines = 0;
int totalBytes = 0;
int dictionaryCount = 0;
// will fail with files with too many distinct words
// just increase the index size in that case
int[] dictionaryData = new int[4096 * 3072];
int dictionaries = 0;
void processFile (String arg) throws Exception {
File file = new File(arg);
if (!file.isFile()) return;
final int numBytes = (int) file.length();
FileInputStream in = new FileInputStream(arg);
// index of start of current dictionary
int dindex = 0;
// buffered read:
final byte[] buf = new byte[4096];
for (int bytesLeft = numBytes; bytesLeft > 0; bytesLeft-=4096)
dindex = processChunk(buf, in.read(buf, 0, 4096), dindex);
totalBytes += numBytes;
}
void printResults (boolean dump) {
System.out.println("Lines\tWords\tBytes");
System.out.println("---------------------------------------");
System.out.println(totalLines + "\t" + totalWords + "\t" +
totalBytes + "\tTotal");
System.out.println("---------------------------------------");
if (dump)
dumpDictionary(0, new char[1024], 0);
System.out.println("dictionaryCount: " + dictionaryCount);
}
int processChunk (byte[] buf, int len, int dindex) {
int numLines = 0;
int numWords = 0;
final int[] dictionaryData = this.dictionaryData;
int dictionaryCount = this.dictionaryCount;
for (int j = 0; j < len; ++j) {
int c = buf[j] & 0x7f;
if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') {
final int index = ((c - 'A')^32) + dindex;
dindex = dictionaryData[index];
if (dindex == 0)
dindex = dictionaryData[index] = (++dictionaryCount)*64;
} else {
if (c == '\n')
numLines++;
if (dindex != 0) {
numWords++;
dictionaryData[dindex + 26]++;
dindex = 0;
}
}
}
totalLines += numLines;
totalWords += numWords;
this.dictionaryCount = dictionaryCount;
return dindex;
}
void dumpDictionary (int dindex, char[] buf, int buflen) {
if (dictionaryData[dindex + 26] != 0)
System.out.println(dictionaryData[dindex + 26] + "\t" + new
String(buf, 0, buflen));
for (int i = 0; i < 64; ++i) {
if ((dictionaryData[dindex + i] != 0) && (i != 26)) {
buf[buflen] = (char)('A' + (i^32));
dumpDictionary(dictionaryData[dindex + i], buf, buflen + 1);
}
}
}
}