Files
auto-archiver-setup-tool/functions/index.js

98 lines
3.4 KiB
JavaScript

/**
* Import function triggers from their respective submodules:
*
* const {onCall} = require("firebase-functions/v2/https");
* const {onDocumentWritten} = require("firebase-functions/v2/firestore");
*
* See a full list of supported triggers at https://firebase.google.com/docs/functions
*/
const { onSchedule } = require("firebase-functions/v2/scheduler");
const logger = require("firebase-functions/logger");
// The Firebase Admin SDK to access Firestore.
const { initializeApp } = require("firebase-admin/app");
const { getFirestore } = require("firebase-admin/firestore");
const { defineSecret } = require('firebase-functions/params');
const API_TOKEN = defineSecret('API_SERVICE_PASSWORD');
const CLIENT_EMAIL = defineSecret('GOOGLE_API_CLIENT_EMAIL');
const PRIVATE_KEY = defineSecret('GOOGLE_API_PRIVATE_KEY');
const { google } = require('googleapis');
initializeApp();
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
String.prototype.hashCode = function () {
// https://stackoverflow.com/a/7616484/6196010
// Generating 1M random strings and applying this function shows it's very balanced for modulo 60
// 0 has double frequency of other numbers, but that's not a problem
var hash = 0,
i, chr;
if (this.length === 0) return hash;
for (i = 0; i < this.length; i++) {
chr = this.charCodeAt(i);
hash = ((hash << 5) - hash) + chr;
hash |= 0; // Convert to 32bit integer
}
return hash;
}
exports.processSheetScheduler = onSchedule(
{ secrets: [API_TOKEN, CLIENT_EMAIL, PRIVATE_KEY], schedule: "* * * * *" },
async (event) => {
// authenticate the service account
const googleAuth = new google.auth.JWT(CLIENT_EMAIL.value(), null, PRIVATE_KEY.value().replace(/\\n/g, '\n'), 'https://www.googleapis.com/auth/spreadsheets');
const sheets = await google.sheets({ version: 'v4', auth: googleAuth });
// get all documents from firestore sheets collection
const db = getFirestore();
// each sheet runs once per hour, so we hash the sheet id and only process it if the hash % 60 matches the cron minute
const querySnapshot = await db.collection("sheets").get();
const eventDate = new Date(Date.parse(event.scheduleTime));
querySnapshot.forEach(async (doc) => {
const hashToSixty = Math.abs(doc.id.hashCode() % 60);
if (hashToSixty != eventDate.getMinutes()) {
return;
}
logger.log(`processing document ${doc.id}, its hash % 60 (${hashToSixty}) matches the cron minute (${eventDate.getMinutes()})`);
try {
await sheets.spreadsheets.get({ spreadsheetId: doc.data().sheetId });
} catch (e) {
if (e.status == 404) {
await doc.ref.delete();
logger.log(`document ${doc.data().sheetId} not found, deleted`);
return;
}
}
// send POST request with sheetID to trigger sheet processing
const url = "https://auto-archiver-api.bellingcat.com/sheet_service";
const data = {
sheet_id: doc.data().sheetId,
author_id: doc.data().email ?? doc.data().uid,
tags: ["setup-tool"]
};
const options = {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${API_TOKEN.value()}`,
},
body: JSON.stringify(data),
};
const response = await fetch(url, options);
console.log(response);
await doc.ref.update({ lastArchived: Date.now() });
await sleep(100);
});
}
);