// Module: FileLineReader // Constructor: FileLineReader(filename, bufferSize = 8192) // Methods: hasNextLine() -> boolean // nextLine() -> String // // var fs = require("fs"); var sys = require("sys"); exports.FileLineReader = function(filename, bufferSize) { if(!bufferSize) { bufferSize = 8192; } //private: var currentPositionInFile = 0; var buffer = ""; var fd = fs.openSync(filename, "r"); // return -1 // when EOF reached // fills buffer with next 8192 or less bytes var fillBuffer = function(position) { var res = fs.readSync(fd, bufferSize, position, "ascii"); buffer += res[0]; if (res[1] == 0) { return -1; } return position + res[1]; }; currentPositionInFile = fillBuffer(0); //public: this.hasNextLine = function() { while (buffer.indexOf("\n") == -1) { currentPositionInFile = fillBuffer(currentPositionInFile); if (currentPositionInFile == -1) { return false; } } if (buffer.indexOf("\n") > -1) { return true; } return false; }; //public: this.nextLine = function() { var lineEnd = buffer.indexOf("\n"); var result = buffer.substring(0, lineEnd); buffer = buffer.substring(result.length + 1, buffer.length); return result; }; return this; };
And some TestCode:
var assert = require("assert"), sys = require("sys"), flr = require("./FileLineReader"); try { var reader = new flr.FileLineReader("top-5.txt", 10); sys.debug(__filename + " First line..."); assert.equal(reader.hasNextLine(), true); assert.equal(reader.nextLine(), "yahoo.com"); sys.debug(__filename + " Next line..."); assert.equal(reader.hasNextLine(), true); assert.equal(reader.nextLine(), "youtube.com"); sys.debug(__filename + " Next line..."); assert.equal(reader.hasNextLine(), true); assert.equal(reader.nextLine(), "facebook.com"); sys.debug(__filename + " Next line..."); assert.equal(reader.hasNextLine(), true); assert.equal(reader.nextLine(), "live.com"); assert.equal(reader.hasNextLine(), true); assert.equal(reader.nextLine(), "msn.com"); assert.equal(reader.hasNextLine(), false); } catch (err) { sys.debug(__filename + ": ERROR: " + err); }
The testdata file top-5.txt looked like this:
yahoo.com youtube.com facebook.com live.com msn.com
This code works perfectly for what I needed. I am using it within some tools that I would like to release to GitHub eventually. Would you mind releasing the code under MIT license, or another?
ReplyDelete- Josh Du Lac
joshdulac.com
I like to close my files early to keep the number of open file descriptors to a minimum so I added the following 'public' method:
ReplyDeletethis.close = function(cb) {
fs.close(fd, cb);
}
what about CRLF? i mean support files with windows EOL
ReplyDeleteThanks for this code. It is very helpful for me and my research.
ReplyDeleteWorks well for loading in enormous files - added an encoding parameter to handle other file types (obviously, no big deal).
ReplyDeleteHey Dirk,
ReplyDeleteCouldn't find your email but I made this into an NPM package (https://github.com/davidbenhaim/FileLineReader). You're listed as one of the authors.
- David
your readSync usage appears to be currently deprecated. i changed things to use buffers & made it a class 'cuz i'm a .js noob and don't really know how to deal with exports:
ReplyDeleteconst fs = require('fs')
class LineReader {
constructor (filename, bufSize) {
this.bufferSize = bufSize ? bufSize : 8192;
this.fd = fs.openSync(filename, "r");
this.buffer = Buffer.alloc(this.bufferSize);
this.filePos = 0;
this.bufferPos = 0;
this.bufferBytes = 0;
this.fillBuffer()
}
// fills buffer with next this.bufferSize bytes
fillBuffer () {
// use buffered readSync instead:
let bytesRead = fs.readSync(this.fd, this.buffer, 0, this.bufferSize, this.filePos)
this.bufferBytes = bytesRead
this.filePos += bytesRead
this.bufferPos = 0
return bytesRead
};
//public:
nextLine() {
if (this.bufferBytes <= 0) {
// last fillBuffer() was @ or past end of file
return null
}
let line = ""
let lineEnd = -1;
while ((lineEnd = this.buffer.indexOf("\n", this.bufferPos)) < 0) {
line += this.buffer.toString("utf8", this.bufferPos, this.bufferBytes)
if (this.fillBuffer() <= 0) {
return line;
}
}
line += this.buffer.toString("utf8", this.bufferPos, lineEnd)
this.bufferPos = lineEnd + 1
return line;
};
close(cb) {
fs.close(this.fd, cb);
}
};
// make a 3 line test file with no trailing newline:
var path = "bozo.txt"
var data = "one line\nanother line\nand another"
fs.writeFileSync(path, data)
// test using small buffer size to make sure we cross boundaries properly:
var line = ""
reader = new LineReader(path, 4)
while ((line = reader.nextLine()) != null) {
console.log(line)
}
reader.close()
console.log("Done !")
I got an error that the close(cb) method required a callback, so I changed it to the following code and everything worked:
ReplyDeleteclose() {
fs.close(this.fd, function(err) {
if (err) throw err;
console.log("file closed.");
});
}