mirror of
https://github.com/bellingcat/datasheet-server.git
synced 2026-06-10 12:28:34 +03:00
Merge pull request #29 from forensic-architecture/topic/add-deeprows
Topic/add deeprows
This commit is contained in:
@@ -1,37 +0,0 @@
|
||||
import R from 'ramda'
|
||||
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
|
||||
|
||||
/**
|
||||
* byColumn - generate a Blueprint from a data sheet by column. Each column
|
||||
* name is a resheet, and all values in that column are the resheet items.
|
||||
*
|
||||
* @param {type} data - list of lists representing sheet data.
|
||||
* @return {type} Blueprint
|
||||
* generated.
|
||||
*/
|
||||
function columns (tabName, sheetName, sheetId, data) {
|
||||
// Define Blueprint props
|
||||
const bp = R.clone(defaultBlueprint)
|
||||
bp.sheet = {
|
||||
name: sheetName,
|
||||
id: sheetId
|
||||
}
|
||||
bp.name = tabName
|
||||
|
||||
// column names define resources
|
||||
const labels = data[0]
|
||||
labels.forEach(label => {
|
||||
bp.resources[label] = R.clone(defaultResource)
|
||||
})
|
||||
|
||||
// remaining rows as data
|
||||
data.forEach((row, idx) => {
|
||||
if (idx === 0) return
|
||||
labels.forEach((label, idx) => {
|
||||
bp.resources[label].data.push(row[idx])
|
||||
})
|
||||
})
|
||||
return bp
|
||||
}
|
||||
|
||||
export default columns
|
||||
71
src/blueprinters/deeprows.js
Normal file
71
src/blueprinters/deeprows.js
Normal file
@@ -0,0 +1,71 @@
|
||||
import { fmtObj } from '../lib/util'
|
||||
|
||||
/**
|
||||
* Each resource item is an object with values labelled according
|
||||
* to column names specified in the sheet's first row. If two or more
|
||||
* column names are the same except for a different integer at the end
|
||||
* (e.g. 'tag1', and 'tag2'), then the values of those two columns are
|
||||
* aggregated into a list, which is the value of the prefix's key ('tag').
|
||||
* Any values in those columns that are empty will NOT be added to the list.
|
||||
*
|
||||
* @param {type} data list of lists representing sheet data.
|
||||
* @return {type} Array the structured data.
|
||||
*/
|
||||
export default (data) => {
|
||||
const itemLabels = data[0]
|
||||
const baseFmt = fmtObj(itemLabels)
|
||||
const output = []
|
||||
|
||||
// create a structure to indicate which columns needs to be aggregated
|
||||
const endsWithNumber = new RegExp('(.*)[0-9]+$')
|
||||
const structure = {
|
||||
__flat: []
|
||||
}
|
||||
|
||||
itemLabels.forEach(label => {
|
||||
const matches = label.match(endsWithNumber)
|
||||
if (!matches) {
|
||||
structure.__flat.push(label)
|
||||
} else {
|
||||
const labelPrefix = `${matches[1]}s`
|
||||
if (labelPrefix in structure) {
|
||||
structure[labelPrefix].push(label)
|
||||
} else {
|
||||
structure[labelPrefix] = [ label ]
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// generate the value for deep labels using the structure created
|
||||
data.forEach((row, idx) => {
|
||||
if (idx === 0) return
|
||||
const baseRow = baseFmt(row)
|
||||
const deepRow = {}
|
||||
|
||||
// generate deep row labels using structure
|
||||
Object.keys(structure)
|
||||
.forEach(newLabel => {
|
||||
if (newLabel !== '__flat') {
|
||||
const oldLabels = structure[newLabel]
|
||||
// only add new value if not ''
|
||||
const labelValues = []
|
||||
oldLabels.forEach(l => {
|
||||
const vl = baseRow[l]
|
||||
if (vl !== '') {
|
||||
labelValues.push(vl)
|
||||
}
|
||||
})
|
||||
deepRow[newLabel] = labelValues
|
||||
}
|
||||
})
|
||||
|
||||
// move values for flat labels over from base
|
||||
structure.__flat.forEach(label => {
|
||||
deepRow[label] = baseRow[label]
|
||||
})
|
||||
|
||||
output.push(deepRow)
|
||||
})
|
||||
|
||||
return output
|
||||
}
|
||||
@@ -1,39 +1,16 @@
|
||||
import R from 'ramda'
|
||||
import { fmtObj } from '../lib/util'
|
||||
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
|
||||
|
||||
/**
|
||||
* groups - generate a Blueprint from a data sheet grouped by a column called 'group'
|
||||
* The resource name defaults to 'groups', or a custom resource name can be passed.
|
||||
* Each resource item is an object with values labelled according to column
|
||||
* names. Items are inserted in the data list at idx = id.
|
||||
* names. Items are inserted into the data list at idx = id.
|
||||
*
|
||||
* @param {type} data list of lists representing sheet data.
|
||||
* @param {type} label="groups" name of resource in blueprint.
|
||||
* @param {type} name="" name of blueprint.
|
||||
* @return {type} Blueprint
|
||||
* @param {type} data list of lists representing sheet data.
|
||||
* @return {type} Array the structured data.
|
||||
*/
|
||||
export default function groups (
|
||||
tabName,
|
||||
sheetName,
|
||||
sheetId,
|
||||
data,
|
||||
label = 'groups'
|
||||
) {
|
||||
// Define Blueprint
|
||||
const bp = R.clone(defaultBlueprint)
|
||||
bp.sheet = {
|
||||
name: sheetName,
|
||||
id: sheetId
|
||||
}
|
||||
bp.name = tabName
|
||||
|
||||
// Column names define resources
|
||||
export default (data) => {
|
||||
const itemLabels = data[0]
|
||||
const fmt = fmtObj(itemLabels)
|
||||
bp.resources[label] = R.clone(defaultResource)
|
||||
bp.resources[label].data = []
|
||||
|
||||
const output = []
|
||||
const dataGroups = {}
|
||||
|
||||
data.forEach((row, idx) => {
|
||||
@@ -45,12 +22,14 @@ export default function groups (
|
||||
dataGroups[group].push(fmt(row))
|
||||
}
|
||||
})
|
||||
Object.keys(dataGroups).forEach(groupKey => {
|
||||
bp.resources[label].data.push({
|
||||
group: groupKey,
|
||||
group_label: dataGroups[groupKey][0].group_label,
|
||||
data: dataGroups[groupKey]
|
||||
Object.keys(dataGroups)
|
||||
.forEach(groupKey => {
|
||||
output.push({
|
||||
group: groupKey,
|
||||
group_label: dataGroups[groupKey][0].group_label,
|
||||
data: dataGroups[groupKey]
|
||||
})
|
||||
})
|
||||
})
|
||||
return bp
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
@@ -1,43 +1,22 @@
|
||||
import R from 'ramda'
|
||||
import { fmtObj } from '../lib/util'
|
||||
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
|
||||
|
||||
/**
|
||||
* ids - generate a Blueprint from a data sheet by id, which is an integer.
|
||||
* The resource name defaults to 'ids', or a custom resource name can be passed.
|
||||
* Each resource item is an object with values labelled according to column
|
||||
* names. Items are inserted in the data list at idx = id.
|
||||
* Very similar to the rows blueprinter, but inserts each row as a value in
|
||||
* an object, where the value in the 'id' column of the row will be used as
|
||||
* the search key
|
||||
*
|
||||
* @param {type} data list of lists representing sheet data.
|
||||
* @param {type} label="ids" name of resource in blueprint.
|
||||
* @param {type} name="" name of blueprint.
|
||||
* @return {type} Blueprint
|
||||
* @param {type} data list of lists representing sheet data.
|
||||
* @return {type} Object the structured data.
|
||||
*/
|
||||
export default function ids (
|
||||
tabName,
|
||||
sheetName,
|
||||
sheetId,
|
||||
data,
|
||||
label = 'ids'
|
||||
) {
|
||||
// Define Blueprint
|
||||
const bp = R.clone(defaultBlueprint)
|
||||
bp.sheet = {
|
||||
name: sheetName,
|
||||
id: sheetId
|
||||
}
|
||||
bp.name = tabName
|
||||
|
||||
// Column names define resources
|
||||
export default (data) => {
|
||||
const itemLabels = data[0]
|
||||
const fmt = fmtObj(itemLabels)
|
||||
bp.resources[label] = R.clone(defaultResource)
|
||||
bp.resources[label].data = {}
|
||||
const output = {}
|
||||
|
||||
data.forEach((row, idx) => {
|
||||
if (idx === 0) return
|
||||
bp.resources[label].data[fmt(row).id] = fmt(row)
|
||||
output[fmt(row).id] = fmt(row)
|
||||
})
|
||||
|
||||
return bp
|
||||
return output
|
||||
}
|
||||
|
||||
@@ -1,41 +1,21 @@
|
||||
import R from 'ramda'
|
||||
import { fmtObj } from '../lib/util'
|
||||
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
|
||||
|
||||
/**
|
||||
* rows - generate a Blueprint from a data sheet by row. The resource name
|
||||
* defaults to 'rows', or a custom resource name can be passed. Each resource
|
||||
* item is an object with values labelled according to column names.
|
||||
* Each resource item is an object with values labelled according
|
||||
* to column names specified in the sheet's first row.
|
||||
*
|
||||
* @param {type} data list of lists representing sheet data.
|
||||
* @param {type} label="rows" name of resource in blueprint.
|
||||
* @param {type} name="" name of blueprint.
|
||||
* @return {type} Blueprint
|
||||
* @param {type} data list of lists representing sheet data.
|
||||
* @return {type} Array the structured data.
|
||||
*/
|
||||
export default function rows (
|
||||
tabName,
|
||||
sheetName,
|
||||
sheetId,
|
||||
data,
|
||||
label = 'rows'
|
||||
) {
|
||||
// Define Blueprint
|
||||
const bp = R.clone(defaultBlueprint)
|
||||
bp.sheet = {
|
||||
name: sheetName,
|
||||
id: sheetId
|
||||
}
|
||||
bp.name = tabName
|
||||
|
||||
// Column names define resources
|
||||
export default (data) => {
|
||||
const itemLabels = data[0]
|
||||
const fmt = fmtObj(itemLabels)
|
||||
bp.resources[label] = R.clone(defaultResource)
|
||||
bp.resources[label].data = []
|
||||
const output = []
|
||||
|
||||
data.forEach((row, idx) => {
|
||||
if (idx === 0) return
|
||||
bp.resources[label].data.push(fmt(row))
|
||||
output.push(fmt(row))
|
||||
})
|
||||
return bp
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
@@ -1,36 +1,10 @@
|
||||
import R from 'ramda'
|
||||
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
|
||||
|
||||
/**
|
||||
* tree - generate a Blueprint from a data sheet grouped by a column called 'group'
|
||||
* The resource name defaults to 'groups', or a custom resource name can be passed.
|
||||
* Each resource item is an object with values labelled according to column
|
||||
* names. Items are inserted in the data list at idx = id.
|
||||
* Each resource item is inserted into a tree. TODO: describe layout.
|
||||
*
|
||||
* @param {type} data list of lists representing sheet data.
|
||||
* @param {type} label="groups" name of resource in blueprint.
|
||||
* @param {type} name="" name of blueprint.
|
||||
* @return {type} Blueprint
|
||||
* @param {type} data list of lists representing sheet data.
|
||||
* @return {type} Array the structured data.
|
||||
*/
|
||||
export default function tree (
|
||||
tabName,
|
||||
sheetName,
|
||||
sheetId,
|
||||
data,
|
||||
label = 'tree'
|
||||
) {
|
||||
// Define Blueprint
|
||||
const bp = R.clone(defaultBlueprint)
|
||||
bp.sheet = {
|
||||
name: sheetName,
|
||||
id: sheetId
|
||||
}
|
||||
bp.name = tabName
|
||||
|
||||
// Column names define resources
|
||||
bp.resources[label] = R.clone(defaultResource)
|
||||
bp.resources[label].data = {}
|
||||
|
||||
export default (data) => {
|
||||
const tree = {
|
||||
key: 'tags',
|
||||
children: {}
|
||||
@@ -62,6 +36,5 @@ export default function tree (
|
||||
}
|
||||
})
|
||||
|
||||
bp.resources[label].data = tree
|
||||
return bp
|
||||
return tree
|
||||
}
|
||||
|
||||
@@ -10,10 +10,11 @@ export default {
|
||||
name: 'example',
|
||||
id: '1UC7DkCFeUXHfpUxUGruExwFbP4pqVBdJLOKfo6wDDGk',
|
||||
tabs: {
|
||||
export_events: [BP.byId, BP.byRow],
|
||||
export_categories: [BP.byGroup, BP.byRow],
|
||||
export_sites: BP.byRow,
|
||||
export_tags: BP.byTree
|
||||
export_events: [BP.deeprows, BP.rows],
|
||||
export_categories: [BP.groups, BP.rows],
|
||||
export_sources: BP.ids,
|
||||
export_sites: BP.rows,
|
||||
export_tags: BP.tree
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -91,9 +91,9 @@ class Fetcher {
|
||||
*/
|
||||
_saveViaBlueprinter (tab, data, blueprinter) {
|
||||
const saturatedBp = blueprinter(
|
||||
tab,
|
||||
this.sheetName,
|
||||
this.sheetId,
|
||||
this.sheetName,
|
||||
tab,
|
||||
data
|
||||
)
|
||||
|
||||
|
||||
@@ -24,13 +24,27 @@ export function buildDesaturated (sheetId, sheetName, tab, resource) {
|
||||
return bp
|
||||
}
|
||||
|
||||
const buildBlueprinter = R.curry((datafierName, datafier, sheetId, sheetName, tabName, data) => {
|
||||
const bp = R.clone(defaultBlueprint)
|
||||
bp.sheet = {
|
||||
name: sheetName,
|
||||
id: sheetId
|
||||
}
|
||||
bp.name = tabName
|
||||
bp.resources[datafierName] = R.clone(defaultResource)
|
||||
bp.resources[datafierName].data = datafier(data)
|
||||
|
||||
return bp
|
||||
})
|
||||
|
||||
// import all default exports from 'blueprinters' folder
|
||||
const allBps = {}
|
||||
const REL_PATH_TO_BPS = '../blueprinters'
|
||||
const normalizedPath = path.join(__dirname, REL_PATH_TO_BPS)
|
||||
fs.readdirSync(normalizedPath).forEach(file => {
|
||||
const bpName = file.replace('.js', '')
|
||||
allBps[bpName] = require(`${REL_PATH_TO_BPS}/${file}`).default
|
||||
const datafier = require(`${REL_PATH_TO_BPS}/${file}`).default
|
||||
allBps[bpName] = buildBlueprinter(bpName, datafier)
|
||||
})
|
||||
|
||||
// NB: revert to ES5 'module.exports' required to make blueprinters from
|
||||
|
||||
@@ -3,10 +3,11 @@ import R from 'ramda'
|
||||
import {
|
||||
defaultBlueprint,
|
||||
defaultResource,
|
||||
columns,
|
||||
rows
|
||||
} from '../src/lib/blueprinters'
|
||||
|
||||
import rows from '../src/blueprinters/rows'
|
||||
import deeprows from '../src/blueprinters/deeprows'
|
||||
|
||||
const egInput1 = [
|
||||
['h1', 'h2', 'h3'],
|
||||
[1, 2, 3],
|
||||
@@ -25,41 +26,20 @@ test('defaultBlueprint exports', t => {
|
||||
t.deepEqual(expected, defaultBlueprint)
|
||||
})
|
||||
|
||||
test('columns blueprinter generates expected output', t => {
|
||||
const actual = columns('eg ColumnBlueprint', 'egSheetName', 'egSheetId', egInput1)
|
||||
const expected = R.clone(defaultBlueprint)
|
||||
expected.name = 'eg ColumnBlueprint'
|
||||
expected.sheet = {
|
||||
id: 'egSheetId',
|
||||
name: 'egSheetName'
|
||||
}
|
||||
expected.resources['h1'] = R.clone(defaultResource)
|
||||
expected.resources['h1'].data = [1, 4]
|
||||
expected.resources['h2'] = R.clone(defaultResource)
|
||||
expected.resources['h2'].data = [2, 5]
|
||||
expected.resources['h3'] = R.clone(defaultResource)
|
||||
expected.resources['h3'].data = [3, 6]
|
||||
test('rows blueprinter', t => {
|
||||
const expected = [
|
||||
{ h1: 1, h2: 2, h3: 3 },
|
||||
{ h1: 4, h2: 5, h3: 6 },
|
||||
]
|
||||
const actual = rows(egInput1)
|
||||
t.deepEqual(expected, actual)
|
||||
})
|
||||
|
||||
test('rows blueprinter generates expected output', t => {
|
||||
const actual = rows('egRowBlueprint', 'egSheetName', 'egSheetId', egInput1, 'items')
|
||||
const expected = R.clone(defaultBlueprint)
|
||||
expected.name = 'egRowBlueprint'
|
||||
expected.sheet = {
|
||||
id: 'egSheetId',
|
||||
name: 'egSheetName'
|
||||
}
|
||||
expected.resources['items'] = R.clone(defaultResource)
|
||||
expected.resources['items'].data = [{
|
||||
h1: 1,
|
||||
h2: 2,
|
||||
h3: 3
|
||||
},
|
||||
{
|
||||
h1: 4,
|
||||
h2: 5,
|
||||
h3: 6
|
||||
}]
|
||||
test('deeprows blueprinter', t => {
|
||||
const expected = [
|
||||
{ 'hs': [1,2,3] },
|
||||
{ 'hs': [4,5,6] }
|
||||
]
|
||||
const actual = deeprows(egInput1)
|
||||
t.deepEqual(expected, actual)
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user