Monday, March 22, 2010

Reading files with node.js

I tried to find an example on using node.js to read a file line by line. I found none so I had to write one myself.


// Module: FileLineReader
// Constructor: FileLineReader(filename, bufferSize = 8192)
// Methods: hasNextLine() -> boolean
//          nextLine() -> String
//
//
var fs = require("fs");
var sys = require("sys");

exports.FileLineReader = function(filename, bufferSize) {

    if(!bufferSize) {
        bufferSize = 8192;
    }

    //private:
    var currentPositionInFile = 0;
    var buffer = "";
    var fd = fs.openSync(filename, "r");


    // return -1
    // when EOF reached
    // fills buffer with next 8192 or less bytes
    var fillBuffer = function(position) {

        var res = fs.readSync(fd, bufferSize, position, "ascii");

        buffer += res[0];
        if (res[1] == 0) {
            return -1;
        }
        return position + res[1];

    };

    currentPositionInFile = fillBuffer(0);

    //public:
    this.hasNextLine = function() {
        while (buffer.indexOf("\n") == -1) {
            currentPositionInFile = fillBuffer(currentPositionInFile);
            if (currentPositionInFile == -1) {
                return false;
            }
        }

        if (buffer.indexOf("\n") > -1) {

            return true;
        }
        return false;
    };

    //public:
    this.nextLine = function() {
        var lineEnd = buffer.indexOf("\n");
        var result = buffer.substring(0, lineEnd);

        buffer = buffer.substring(result.length + 1, buffer.length);
        return result;
    };

    return this;
};




And some TestCode:

var assert = require("assert"),
        sys = require("sys"),
        flr = require("./FileLineReader");

try {
    var reader = new flr.FileLineReader("top-5.txt", 10);

    sys.debug(__filename + " First line...");
    assert.equal(reader.hasNextLine(), true);
    assert.equal(reader.nextLine(), "yahoo.com");

    sys.debug(__filename + " Next line...");

    assert.equal(reader.hasNextLine(), true);
    assert.equal(reader.nextLine(), "youtube.com");
    sys.debug(__filename + " Next line...");

    assert.equal(reader.hasNextLine(), true);
    assert.equal(reader.nextLine(), "facebook.com");
    sys.debug(__filename + " Next line...");

    assert.equal(reader.hasNextLine(), true);
    assert.equal(reader.nextLine(), "live.com");

    assert.equal(reader.hasNextLine(), true);
    assert.equal(reader.nextLine(), "msn.com");


    assert.equal(reader.hasNextLine(), false);

} catch (err) {
    sys.debug(__filename + ": ERROR: " + err);
}



The testdata file top-5.txt looked like this:

yahoo.com
youtube.com
facebook.com
live.com
msn.com

6 comments:

  1. This code works perfectly for what I needed. I am using it within some tools that I would like to release to GitHub eventually. Would you mind releasing the code under MIT license, or another?

    - Josh Du Lac
    joshdulac.com

    ReplyDelete
  2. I like to close my files early to keep the number of open file descriptors to a minimum so I added the following 'public' method:

    this.close = function(cb) {
    fs.close(fd, cb);
    }

    ReplyDelete
  3. what about CRLF? i mean support files with windows EOL

    ReplyDelete
  4. Thanks for this code. It is very helpful for me and my research.

    ReplyDelete
  5. Works well for loading in enormous files - added an encoding parameter to handle other file types (obviously, no big deal).

    ReplyDelete
  6. Hey Dirk,

    Couldn't find your email but I made this into an NPM package (https://github.com/davidbenhaim/FileLineReader). You're listed as one of the authors.

    - David

    ReplyDelete