Question

I'm attempting to extract a single line of a file, given that I know the pathname and the line number.

Solution - without optimisation

With readable stream

var fs = require('fs');
 
function get_line(filename, line_no, callback) {
    var stream = fs.createReadStream(filename, {
      flags: 'r',
      encoding: 'utf-8',
      fd: null,
      mode: 0666,
      bufferSize: 64 * 1024
    });
 
    var fileData = '';
    stream.on('data', function(data){
      fileData += data;
 
      // The next lines should be improved
      var lines = fileData.split("\n");
 
      if(lines.length >= +line_no){
        stream.destroy();
        callback(null, lines[+line_no]);
      }
    });
 
    stream.on('error', function(){
      callback('Error', null);
    });
 
    stream.on('end', function(){
      callback('File end reached without finding line', null);
    });
 
}
 
get_line('./file.txt', 1, function(err, line){
  console.log('The line: ' + line);
})

Direct solution

You should use the slice method instead of a loop:

var fs = require('fs');
 
function get_line(filename, line_no, callback) {
    var data = fs.readFileSync(filename, 'utf8');
    var lines = data.split("\n");
 
    if(+line_no > lines.length){
      throw new Error('File end reached without finding line');
    }
 
    callback(null, lines[+line_no]);
}
 
get_line('./file.txt', 9, function(err, line){
  console.log('The line: ' + line);
})

for (var l in lines) isn't the most efficient way for looping over an array, you should do this instead:

for(var i = 0, iMax = lines.length; i < iMax; i++){/* lines[i] */ }

The asynchronous way

var fs = require('fs');
 
function get_line(filename, line_no, callback) {
    fs.readFile(filename, function (err, data) {
      if (err) throw err;
 
      // Data is a buffer that we need to convert to a string
      // Improvement: loop over the buffer and stop when the line is reached
      var lines = data.toString('utf-8').split("\n");
 
      if(+line_no > lines.length){
        return callback('File end reached without finding line', null);
      }
 
      callback(null, lines[+line_no]);
    });
}
 
get_line('./file.txt', 9, function(err, line){
  console.log('The line: ' + line);
})

Solution - with optimisation

Neither of above solution actually read any less of the file into memory - which is the optimisation we usually need to extract the line from a big string since the IO is still relatively slow.

Ideally, it is better to do this without reading any more of the file than is necessary.

Stream API

https://strongloop.com/strongblog/practical-examples-of-the-new-node-js-streams-api/

Node Binary Reader

https://github.com/gagle/node-binary-reader

Delete previous data

function(file, line_no, cb){
var stream = fs.createReadStream(file, {
    flags: 'r',
    encoding: 'utf-8',
    fd: null,
    mode: '0666',
    bufferSize: 64 * 1024
});
 
var fileData = '';
stream.on('data', function(data){
    fileData += data;
 
    var lines = fileData.split('\n');
 
    if(lines.length >= +line_no){
        stream.destroy();
        cb(null, lines[+line_no]);
    }
    // Add this else condition to remove all unnecesary data from the variable
    else
        fileData = Array(lines.length).join('\n');
 
});
 
stream.on('error', function(){
    cb('Error', null);
});
 
stream.on('end', function(){
    cb('File end reached without finding line', null);
});
};

Using a 70000 lines file, to display line n°50000 I got those results:

  • real 0m3.504s
  • user 0m0.000s
  • sys 0m0.015s

References & Resources

  • http://stackoverflow.com/questions/6394951/read-nth-line-of-file-in-nodejs
  • https://strongloop.com/strongblog/practical-examples-of-the-new-node-js-streams-api/
  • https://github.com/gagle/node-binary-reader