Programmatically Stream (Upload) Large Files to Amazon S3

The upload() method in the AWS JavaScript SDK does a good job of uploading objects to S3 even if they’re large enough to warrant a multipart upload. It’s also possible to pipe a data stream to it in order to upload very large objects. To do this, simply wrap the upload() function with the Node.js stream.PassThrough() function:

const AWS = require('aws-sdk');
const S3 = new AWS.S3();
const stream = require('stream');

function upload(S3) {
  let pass = new stream.PassThrough();

  let params = {
    Bucket: BUCKET,
    Key: KEY,
    Body: pass
  };

  S3.upload(params, function (error, data) {
    console.error(error);
    console.info(data);
  });

  return pass;
}

const readStream = fs.createReadStream('/path/to/your/file');
readStream.pipe(upload(S3));

A passthrough stream takes bytes written to it & outputs them. This returns a writable stream that upload() reads from, as you write to it.

You can also monitor the upload progress using ManagedUpload:

const manager = S3.upload(params);

manager.on('httpUploadProgress', (progress) => {
  console.log('progress', progress)
  // { loaded: 6472, total: 345486, part: 3, key: 'large-file.dat' }
});

If you’re using Knex.js & streaming data from a table:

const knexStream = knex.select('*').from(TABLE).stream();
const pass = new stream.PassThrough();

knexStream.on('data', (part) =>
  pass.write(JSON.stringify(part) + '\n'));
knexStream.on('end', () => pass.end());

await s3.upload({
  Bucket: BUCKET,
  Key: KEY,
  Body: pass
}).promise();

References