mirror of
https://github.com/bellingcat/datasheet-server.git
synced 2026-06-12 05:18:31 +03:00
Merge pull request #29 from forensic-architecture/topic/add-deeprows
Topic/add deeprows
This commit is contained in:
@@ -1,37 +0,0 @@
|
|||||||
import R from 'ramda'
|
|
||||||
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
|
|
||||||
|
|
||||||
/**
|
|
||||||
* byColumn - generate a Blueprint from a data sheet by column. Each column
|
|
||||||
* name is a resheet, and all values in that column are the resheet items.
|
|
||||||
*
|
|
||||||
* @param {type} data - list of lists representing sheet data.
|
|
||||||
* @return {type} Blueprint
|
|
||||||
* generated.
|
|
||||||
*/
|
|
||||||
function columns (tabName, sheetName, sheetId, data) {
|
|
||||||
// Define Blueprint props
|
|
||||||
const bp = R.clone(defaultBlueprint)
|
|
||||||
bp.sheet = {
|
|
||||||
name: sheetName,
|
|
||||||
id: sheetId
|
|
||||||
}
|
|
||||||
bp.name = tabName
|
|
||||||
|
|
||||||
// column names define resources
|
|
||||||
const labels = data[0]
|
|
||||||
labels.forEach(label => {
|
|
||||||
bp.resources[label] = R.clone(defaultResource)
|
|
||||||
})
|
|
||||||
|
|
||||||
// remaining rows as data
|
|
||||||
data.forEach((row, idx) => {
|
|
||||||
if (idx === 0) return
|
|
||||||
labels.forEach((label, idx) => {
|
|
||||||
bp.resources[label].data.push(row[idx])
|
|
||||||
})
|
|
||||||
})
|
|
||||||
return bp
|
|
||||||
}
|
|
||||||
|
|
||||||
export default columns
|
|
||||||
71
src/blueprinters/deeprows.js
Normal file
71
src/blueprinters/deeprows.js
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
import { fmtObj } from '../lib/util'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Each resource item is an object with values labelled according
|
||||||
|
* to column names specified in the sheet's first row. If two or more
|
||||||
|
* column names are the same except for a different integer at the end
|
||||||
|
* (e.g. 'tag1', and 'tag2'), then the values of those two columns are
|
||||||
|
* aggregated into a list, which is the value of the prefix's key ('tag').
|
||||||
|
* Any values in those columns that are empty will NOT be added to the list.
|
||||||
|
*
|
||||||
|
* @param {type} data list of lists representing sheet data.
|
||||||
|
* @return {type} Array the structured data.
|
||||||
|
*/
|
||||||
|
export default (data) => {
|
||||||
|
const itemLabels = data[0]
|
||||||
|
const baseFmt = fmtObj(itemLabels)
|
||||||
|
const output = []
|
||||||
|
|
||||||
|
// create a structure to indicate which columns needs to be aggregated
|
||||||
|
const endsWithNumber = new RegExp('(.*)[0-9]+$')
|
||||||
|
const structure = {
|
||||||
|
__flat: []
|
||||||
|
}
|
||||||
|
|
||||||
|
itemLabels.forEach(label => {
|
||||||
|
const matches = label.match(endsWithNumber)
|
||||||
|
if (!matches) {
|
||||||
|
structure.__flat.push(label)
|
||||||
|
} else {
|
||||||
|
const labelPrefix = `${matches[1]}s`
|
||||||
|
if (labelPrefix in structure) {
|
||||||
|
structure[labelPrefix].push(label)
|
||||||
|
} else {
|
||||||
|
structure[labelPrefix] = [ label ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// generate the value for deep labels using the structure created
|
||||||
|
data.forEach((row, idx) => {
|
||||||
|
if (idx === 0) return
|
||||||
|
const baseRow = baseFmt(row)
|
||||||
|
const deepRow = {}
|
||||||
|
|
||||||
|
// generate deep row labels using structure
|
||||||
|
Object.keys(structure)
|
||||||
|
.forEach(newLabel => {
|
||||||
|
if (newLabel !== '__flat') {
|
||||||
|
const oldLabels = structure[newLabel]
|
||||||
|
// only add new value if not ''
|
||||||
|
const labelValues = []
|
||||||
|
oldLabels.forEach(l => {
|
||||||
|
const vl = baseRow[l]
|
||||||
|
if (vl !== '') {
|
||||||
|
labelValues.push(vl)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
deepRow[newLabel] = labelValues
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// move values for flat labels over from base
|
||||||
|
structure.__flat.forEach(label => {
|
||||||
|
deepRow[label] = baseRow[label]
|
||||||
|
})
|
||||||
|
|
||||||
|
output.push(deepRow)
|
||||||
|
})
|
||||||
|
|
||||||
|
return output
|
||||||
|
}
|
||||||
@@ -1,39 +1,16 @@
|
|||||||
import R from 'ramda'
|
|
||||||
import { fmtObj } from '../lib/util'
|
import { fmtObj } from '../lib/util'
|
||||||
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* groups - generate a Blueprint from a data sheet grouped by a column called 'group'
|
|
||||||
* The resource name defaults to 'groups', or a custom resource name can be passed.
|
|
||||||
* Each resource item is an object with values labelled according to column
|
* Each resource item is an object with values labelled according to column
|
||||||
* names. Items are inserted in the data list at idx = id.
|
* names. Items are inserted into the data list at idx = id.
|
||||||
*
|
*
|
||||||
* @param {type} data list of lists representing sheet data.
|
* @param {type} data list of lists representing sheet data.
|
||||||
* @param {type} label="groups" name of resource in blueprint.
|
* @return {type} Array the structured data.
|
||||||
* @param {type} name="" name of blueprint.
|
|
||||||
* @return {type} Blueprint
|
|
||||||
*/
|
*/
|
||||||
export default function groups (
|
export default (data) => {
|
||||||
tabName,
|
|
||||||
sheetName,
|
|
||||||
sheetId,
|
|
||||||
data,
|
|
||||||
label = 'groups'
|
|
||||||
) {
|
|
||||||
// Define Blueprint
|
|
||||||
const bp = R.clone(defaultBlueprint)
|
|
||||||
bp.sheet = {
|
|
||||||
name: sheetName,
|
|
||||||
id: sheetId
|
|
||||||
}
|
|
||||||
bp.name = tabName
|
|
||||||
|
|
||||||
// Column names define resources
|
|
||||||
const itemLabels = data[0]
|
const itemLabels = data[0]
|
||||||
const fmt = fmtObj(itemLabels)
|
const fmt = fmtObj(itemLabels)
|
||||||
bp.resources[label] = R.clone(defaultResource)
|
const output = []
|
||||||
bp.resources[label].data = []
|
|
||||||
|
|
||||||
const dataGroups = {}
|
const dataGroups = {}
|
||||||
|
|
||||||
data.forEach((row, idx) => {
|
data.forEach((row, idx) => {
|
||||||
@@ -45,12 +22,14 @@ export default function groups (
|
|||||||
dataGroups[group].push(fmt(row))
|
dataGroups[group].push(fmt(row))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
Object.keys(dataGroups).forEach(groupKey => {
|
Object.keys(dataGroups)
|
||||||
bp.resources[label].data.push({
|
.forEach(groupKey => {
|
||||||
group: groupKey,
|
output.push({
|
||||||
group_label: dataGroups[groupKey][0].group_label,
|
group: groupKey,
|
||||||
data: dataGroups[groupKey]
|
group_label: dataGroups[groupKey][0].group_label,
|
||||||
|
data: dataGroups[groupKey]
|
||||||
|
})
|
||||||
})
|
})
|
||||||
})
|
|
||||||
return bp
|
return output
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,43 +1,22 @@
|
|||||||
import R from 'ramda'
|
|
||||||
import { fmtObj } from '../lib/util'
|
import { fmtObj } from '../lib/util'
|
||||||
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ids - generate a Blueprint from a data sheet by id, which is an integer.
|
* Very similar to the rows blueprinter, but inserts each row as a value in
|
||||||
* The resource name defaults to 'ids', or a custom resource name can be passed.
|
* an object, where the value in the 'id' column of the row will be used as
|
||||||
* Each resource item is an object with values labelled according to column
|
* the search key
|
||||||
* names. Items are inserted in the data list at idx = id.
|
|
||||||
*
|
*
|
||||||
* @param {type} data list of lists representing sheet data.
|
* @param {type} data list of lists representing sheet data.
|
||||||
* @param {type} label="ids" name of resource in blueprint.
|
* @return {type} Object the structured data.
|
||||||
* @param {type} name="" name of blueprint.
|
|
||||||
* @return {type} Blueprint
|
|
||||||
*/
|
*/
|
||||||
export default function ids (
|
export default (data) => {
|
||||||
tabName,
|
|
||||||
sheetName,
|
|
||||||
sheetId,
|
|
||||||
data,
|
|
||||||
label = 'ids'
|
|
||||||
) {
|
|
||||||
// Define Blueprint
|
|
||||||
const bp = R.clone(defaultBlueprint)
|
|
||||||
bp.sheet = {
|
|
||||||
name: sheetName,
|
|
||||||
id: sheetId
|
|
||||||
}
|
|
||||||
bp.name = tabName
|
|
||||||
|
|
||||||
// Column names define resources
|
|
||||||
const itemLabels = data[0]
|
const itemLabels = data[0]
|
||||||
const fmt = fmtObj(itemLabels)
|
const fmt = fmtObj(itemLabels)
|
||||||
bp.resources[label] = R.clone(defaultResource)
|
const output = {}
|
||||||
bp.resources[label].data = {}
|
|
||||||
|
|
||||||
data.forEach((row, idx) => {
|
data.forEach((row, idx) => {
|
||||||
if (idx === 0) return
|
if (idx === 0) return
|
||||||
bp.resources[label].data[fmt(row).id] = fmt(row)
|
output[fmt(row).id] = fmt(row)
|
||||||
})
|
})
|
||||||
|
|
||||||
return bp
|
return output
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,41 +1,21 @@
|
|||||||
import R from 'ramda'
|
|
||||||
import { fmtObj } from '../lib/util'
|
import { fmtObj } from '../lib/util'
|
||||||
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* rows - generate a Blueprint from a data sheet by row. The resource name
|
* Each resource item is an object with values labelled according
|
||||||
* defaults to 'rows', or a custom resource name can be passed. Each resource
|
* to column names specified in the sheet's first row.
|
||||||
* item is an object with values labelled according to column names.
|
|
||||||
*
|
*
|
||||||
* @param {type} data list of lists representing sheet data.
|
* @param {type} data list of lists representing sheet data.
|
||||||
* @param {type} label="rows" name of resource in blueprint.
|
* @return {type} Array the structured data.
|
||||||
* @param {type} name="" name of blueprint.
|
|
||||||
* @return {type} Blueprint
|
|
||||||
*/
|
*/
|
||||||
export default function rows (
|
export default (data) => {
|
||||||
tabName,
|
|
||||||
sheetName,
|
|
||||||
sheetId,
|
|
||||||
data,
|
|
||||||
label = 'rows'
|
|
||||||
) {
|
|
||||||
// Define Blueprint
|
|
||||||
const bp = R.clone(defaultBlueprint)
|
|
||||||
bp.sheet = {
|
|
||||||
name: sheetName,
|
|
||||||
id: sheetId
|
|
||||||
}
|
|
||||||
bp.name = tabName
|
|
||||||
|
|
||||||
// Column names define resources
|
|
||||||
const itemLabels = data[0]
|
const itemLabels = data[0]
|
||||||
const fmt = fmtObj(itemLabels)
|
const fmt = fmtObj(itemLabels)
|
||||||
bp.resources[label] = R.clone(defaultResource)
|
const output = []
|
||||||
bp.resources[label].data = []
|
|
||||||
|
|
||||||
data.forEach((row, idx) => {
|
data.forEach((row, idx) => {
|
||||||
if (idx === 0) return
|
if (idx === 0) return
|
||||||
bp.resources[label].data.push(fmt(row))
|
output.push(fmt(row))
|
||||||
})
|
})
|
||||||
return bp
|
|
||||||
|
return output
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,36 +1,10 @@
|
|||||||
import R from 'ramda'
|
|
||||||
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tree - generate a Blueprint from a data sheet grouped by a column called 'group'
|
* Each resource item is inserted into a tree. TODO: describe layout.
|
||||||
* The resource name defaults to 'groups', or a custom resource name can be passed.
|
|
||||||
* Each resource item is an object with values labelled according to column
|
|
||||||
* names. Items are inserted in the data list at idx = id.
|
|
||||||
*
|
*
|
||||||
* @param {type} data list of lists representing sheet data.
|
* @param {type} data list of lists representing sheet data.
|
||||||
* @param {type} label="groups" name of resource in blueprint.
|
* @return {type} Array the structured data.
|
||||||
* @param {type} name="" name of blueprint.
|
|
||||||
* @return {type} Blueprint
|
|
||||||
*/
|
*/
|
||||||
export default function tree (
|
export default (data) => {
|
||||||
tabName,
|
|
||||||
sheetName,
|
|
||||||
sheetId,
|
|
||||||
data,
|
|
||||||
label = 'tree'
|
|
||||||
) {
|
|
||||||
// Define Blueprint
|
|
||||||
const bp = R.clone(defaultBlueprint)
|
|
||||||
bp.sheet = {
|
|
||||||
name: sheetName,
|
|
||||||
id: sheetId
|
|
||||||
}
|
|
||||||
bp.name = tabName
|
|
||||||
|
|
||||||
// Column names define resources
|
|
||||||
bp.resources[label] = R.clone(defaultResource)
|
|
||||||
bp.resources[label].data = {}
|
|
||||||
|
|
||||||
const tree = {
|
const tree = {
|
||||||
key: 'tags',
|
key: 'tags',
|
||||||
children: {}
|
children: {}
|
||||||
@@ -62,6 +36,5 @@ export default function tree (
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
bp.resources[label].data = tree
|
return tree
|
||||||
return bp
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,10 +10,11 @@ export default {
|
|||||||
name: 'example',
|
name: 'example',
|
||||||
id: '1UC7DkCFeUXHfpUxUGruExwFbP4pqVBdJLOKfo6wDDGk',
|
id: '1UC7DkCFeUXHfpUxUGruExwFbP4pqVBdJLOKfo6wDDGk',
|
||||||
tabs: {
|
tabs: {
|
||||||
export_events: [BP.byId, BP.byRow],
|
export_events: [BP.deeprows, BP.rows],
|
||||||
export_categories: [BP.byGroup, BP.byRow],
|
export_categories: [BP.groups, BP.rows],
|
||||||
export_sites: BP.byRow,
|
export_sources: BP.ids,
|
||||||
export_tags: BP.byTree
|
export_sites: BP.rows,
|
||||||
|
export_tags: BP.tree
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -91,9 +91,9 @@ class Fetcher {
|
|||||||
*/
|
*/
|
||||||
_saveViaBlueprinter (tab, data, blueprinter) {
|
_saveViaBlueprinter (tab, data, blueprinter) {
|
||||||
const saturatedBp = blueprinter(
|
const saturatedBp = blueprinter(
|
||||||
tab,
|
|
||||||
this.sheetName,
|
|
||||||
this.sheetId,
|
this.sheetId,
|
||||||
|
this.sheetName,
|
||||||
|
tab,
|
||||||
data
|
data
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -24,13 +24,27 @@ export function buildDesaturated (sheetId, sheetName, tab, resource) {
|
|||||||
return bp
|
return bp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const buildBlueprinter = R.curry((datafierName, datafier, sheetId, sheetName, tabName, data) => {
|
||||||
|
const bp = R.clone(defaultBlueprint)
|
||||||
|
bp.sheet = {
|
||||||
|
name: sheetName,
|
||||||
|
id: sheetId
|
||||||
|
}
|
||||||
|
bp.name = tabName
|
||||||
|
bp.resources[datafierName] = R.clone(defaultResource)
|
||||||
|
bp.resources[datafierName].data = datafier(data)
|
||||||
|
|
||||||
|
return bp
|
||||||
|
})
|
||||||
|
|
||||||
// import all default exports from 'blueprinters' folder
|
// import all default exports from 'blueprinters' folder
|
||||||
const allBps = {}
|
const allBps = {}
|
||||||
const REL_PATH_TO_BPS = '../blueprinters'
|
const REL_PATH_TO_BPS = '../blueprinters'
|
||||||
const normalizedPath = path.join(__dirname, REL_PATH_TO_BPS)
|
const normalizedPath = path.join(__dirname, REL_PATH_TO_BPS)
|
||||||
fs.readdirSync(normalizedPath).forEach(file => {
|
fs.readdirSync(normalizedPath).forEach(file => {
|
||||||
const bpName = file.replace('.js', '')
|
const bpName = file.replace('.js', '')
|
||||||
allBps[bpName] = require(`${REL_PATH_TO_BPS}/${file}`).default
|
const datafier = require(`${REL_PATH_TO_BPS}/${file}`).default
|
||||||
|
allBps[bpName] = buildBlueprinter(bpName, datafier)
|
||||||
})
|
})
|
||||||
|
|
||||||
// NB: revert to ES5 'module.exports' required to make blueprinters from
|
// NB: revert to ES5 'module.exports' required to make blueprinters from
|
||||||
|
|||||||
@@ -3,10 +3,11 @@ import R from 'ramda'
|
|||||||
import {
|
import {
|
||||||
defaultBlueprint,
|
defaultBlueprint,
|
||||||
defaultResource,
|
defaultResource,
|
||||||
columns,
|
|
||||||
rows
|
|
||||||
} from '../src/lib/blueprinters'
|
} from '../src/lib/blueprinters'
|
||||||
|
|
||||||
|
import rows from '../src/blueprinters/rows'
|
||||||
|
import deeprows from '../src/blueprinters/deeprows'
|
||||||
|
|
||||||
const egInput1 = [
|
const egInput1 = [
|
||||||
['h1', 'h2', 'h3'],
|
['h1', 'h2', 'h3'],
|
||||||
[1, 2, 3],
|
[1, 2, 3],
|
||||||
@@ -25,41 +26,20 @@ test('defaultBlueprint exports', t => {
|
|||||||
t.deepEqual(expected, defaultBlueprint)
|
t.deepEqual(expected, defaultBlueprint)
|
||||||
})
|
})
|
||||||
|
|
||||||
test('columns blueprinter generates expected output', t => {
|
test('rows blueprinter', t => {
|
||||||
const actual = columns('eg ColumnBlueprint', 'egSheetName', 'egSheetId', egInput1)
|
const expected = [
|
||||||
const expected = R.clone(defaultBlueprint)
|
{ h1: 1, h2: 2, h3: 3 },
|
||||||
expected.name = 'eg ColumnBlueprint'
|
{ h1: 4, h2: 5, h3: 6 },
|
||||||
expected.sheet = {
|
]
|
||||||
id: 'egSheetId',
|
const actual = rows(egInput1)
|
||||||
name: 'egSheetName'
|
|
||||||
}
|
|
||||||
expected.resources['h1'] = R.clone(defaultResource)
|
|
||||||
expected.resources['h1'].data = [1, 4]
|
|
||||||
expected.resources['h2'] = R.clone(defaultResource)
|
|
||||||
expected.resources['h2'].data = [2, 5]
|
|
||||||
expected.resources['h3'] = R.clone(defaultResource)
|
|
||||||
expected.resources['h3'].data = [3, 6]
|
|
||||||
t.deepEqual(expected, actual)
|
t.deepEqual(expected, actual)
|
||||||
})
|
})
|
||||||
|
|
||||||
test('rows blueprinter generates expected output', t => {
|
test('deeprows blueprinter', t => {
|
||||||
const actual = rows('egRowBlueprint', 'egSheetName', 'egSheetId', egInput1, 'items')
|
const expected = [
|
||||||
const expected = R.clone(defaultBlueprint)
|
{ 'hs': [1,2,3] },
|
||||||
expected.name = 'egRowBlueprint'
|
{ 'hs': [4,5,6] }
|
||||||
expected.sheet = {
|
]
|
||||||
id: 'egSheetId',
|
const actual = deeprows(egInput1)
|
||||||
name: 'egSheetName'
|
|
||||||
}
|
|
||||||
expected.resources['items'] = R.clone(defaultResource)
|
|
||||||
expected.resources['items'].data = [{
|
|
||||||
h1: 1,
|
|
||||||
h2: 2,
|
|
||||||
h3: 3
|
|
||||||
},
|
|
||||||
{
|
|
||||||
h1: 4,
|
|
||||||
h2: 5,
|
|
||||||
h3: 6
|
|
||||||
}]
|
|
||||||
t.deepEqual(expected, actual)
|
t.deepEqual(expected, actual)
|
||||||
})
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user