so i got my discord data export today. there's a massive JSON stream file (mine was 1.7 GB) containing 78 gender prediction events over the last year and a half or so
i wrote a script to parse the JSON file into gender.csv
discord-gender.mjs
import { open, writeFile } from "node:fs/promises";
function compare(a, b) {
if (a < b) return -1;
if (a > b) return 1;
return 0;
}
const filename = process.argv[2];
if (!filename) {
console.error(`
- Open the folder activity/analytics in your Discord archive.
- You should see at least one JSON file in there.
- Run this script with the JSON filename as its argument.
node discord-gender.mjs "YOUR_LOG_FILE.json"
`);
process.exit(1);
}
const rows = [];
const file = await open(filename);
// This file can be quite large, so make sure to use an efficient line-by-line
// reader rather than gobbling the whole thing into memory. My file was 1.7 GB.
for await (const json of file.readLines()) {
// There are other objects in this stream not about predicting gender. Let's
// do a quick check without parsing the JSON so we can skip thru the other
// events faster.
if (!json.includes("predicted_gender")) {
continue;
}
const obj = JSON.parse(json);
rows.push([
// Technically Discord's date strings may not parse correctly since
// they're not actually ISO 8601 lol, but Node.js seems to parse them
// fine! Anyways, serialize as ISO 8601 since that's better.
new Date(obj.day_pt).toISOString(),
obj.prob_male,
obj.prob_female,
obj.prob_non_binary_gender_expansive,
]);
}
await file.close();
// Sort by dates since the events aren't in chronological order for some reason.
rows.sort(([dateA], [dateB]) => {
return compare(dateA, dateB);
});
// Add CSV headers
rows.unshift(["date", "male", "female", "other"]);
// Yeah, I should use a real CSV serializer. No, I'm not going to. Node.js
// doesn't include one, and this data doesn't have "special characters" in it.
const csv = rows.map((row) => row.join(",")).join("\n");
await writeFile("gender.csv", csv, { encoding: "utf-8" });

































