JS - File Streams
· 3 min read
In JS, typical array operations load the entire array in memory, which are impractical when you are working on large file. We use generators, iterators, and streams to process data in chunks for those cases.
- Read a large file in chunks
- Transform the data using
generators
andyield
- Write the transformed data to a new file
1. Reading a Large File
Use Node.js streams to read the file in chunks:
const fs = require('fs');
function createFileReader(filePath, chunkSize = 1024) {
const stream = fs.createReadStream(filePath,
{ encoding: 'utf8',
highWaterMark: chunkSize });
return stream;
}
fs.createReadStream
reads the file in chunks defined byhighWaterMark
.
2. Transforming Data Using Generators and Iterators
We use a generator function to process the file’s content line-by-line, allowing on-the-fly transformation without storing the entire file in memory.
async function* transformData(stream, transformer) {
let leftover = '';
// NOTE - for ...await.. of loop which waits for each chunk to load
for await (const chunk of stream) {
const lines = (leftover + chunk).split('\n');
leftover = lines.pop(); // Keep the last incomplete line for the next chunk
for (let line of lines) {
yield transformer(line);
}
}
if (leftover) {
yield transformer(leftover); // Process any remaining data
}
}
- Reads file in chunks, splits it into lines.
- Applies transformer function (similar to
map()
for arrays) to modify each line. - Uses
yield
to process each transformed line lazily.
function toUpperCase(line) {
return line.toUpperCase();
}
3. Writing Transformed Data to a New File
To write the transformed data efficiently to a new file, we use write streams:
const fs = require('fs');
async function writeToLargeFile(outputPath, transformedStream) {
// Create a writable stream to write to the file
const writeStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
for await (const transformedLine of transformedStream) {
// Write the transformed line to the file
// If the internal buffer is full, write() returns false
if (!writeStream.write(transformedLine + '\n')) {
// Wait for the 'drain' event, which signals that the buffer has been flushed
await new Promise(resolve => writeStream.once('drain', resolve));
}
}
// Signal that no more data will be written to the stream
writeStream.end();
// Wait for the 'finish' event to ensure all data is flushed before resolving
await new Promise(resolve => writeStream.on('finish', resolve));
}
- Uses
fs.createWriteStream
to write transformed lines incrementally. - Appends each line without loading the entire transformed content into memory.
4. Putting It All Together
async function processLargeFile(inputPath, outputPath, transformer) {
const stream = createFileReader(inputPath);
const transformedStream = transformData(stream, transformer);
await writeToLargeFile(outputPath, transformedStream);
}
processLargeFile('input.txt', 'output.txt', toUpperCase)
.then(() => console.log('Processing completed'))
.catch(err => console.error('Error:', err));