Merge pull request #29 from forensic-architecture/topic/add-deeprows

Topic/add deeprows
This commit is contained in:
Lachlan Kermode
2018-12-14 13:20:20 +00:00
committed by GitHub
10 changed files with 145 additions and 205 deletions

View File

@@ -1,37 +0,0 @@
import R from 'ramda'
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
/**
* byColumn - generate a Blueprint from a data sheet by column. Each column
* name is a resheet, and all values in that column are the resheet items.
*
* @param {type} data - list of lists representing sheet data.
* @return {type} Blueprint
* generated.
*/
function columns (tabName, sheetName, sheetId, data) {
// Define Blueprint props
const bp = R.clone(defaultBlueprint)
bp.sheet = {
name: sheetName,
id: sheetId
}
bp.name = tabName
// column names define resources
const labels = data[0]
labels.forEach(label => {
bp.resources[label] = R.clone(defaultResource)
})
// remaining rows as data
data.forEach((row, idx) => {
if (idx === 0) return
labels.forEach((label, idx) => {
bp.resources[label].data.push(row[idx])
})
})
return bp
}
export default columns

View File

@@ -0,0 +1,71 @@
import { fmtObj } from '../lib/util'
/**
* Each resource item is an object with values labelled according
* to column names specified in the sheet's first row. If two or more
* column names are the same except for a different integer at the end
* (e.g. 'tag1', and 'tag2'), then the values of those two columns are
* aggregated into a list, which is the value of the prefix's key ('tag').
* Any values in those columns that are empty will NOT be added to the list.
*
* @param {type} data list of lists representing sheet data.
* @return {type} Array the structured data.
*/
export default (data) => {
const itemLabels = data[0]
const baseFmt = fmtObj(itemLabels)
const output = []
// create a structure to indicate which columns needs to be aggregated
const endsWithNumber = new RegExp('(.*)[0-9]+$')
const structure = {
__flat: []
}
itemLabels.forEach(label => {
const matches = label.match(endsWithNumber)
if (!matches) {
structure.__flat.push(label)
} else {
const labelPrefix = `${matches[1]}s`
if (labelPrefix in structure) {
structure[labelPrefix].push(label)
} else {
structure[labelPrefix] = [ label ]
}
}
})
// generate the value for deep labels using the structure created
data.forEach((row, idx) => {
if (idx === 0) return
const baseRow = baseFmt(row)
const deepRow = {}
// generate deep row labels using structure
Object.keys(structure)
.forEach(newLabel => {
if (newLabel !== '__flat') {
const oldLabels = structure[newLabel]
// only add new value if not ''
const labelValues = []
oldLabels.forEach(l => {
const vl = baseRow[l]
if (vl !== '') {
labelValues.push(vl)
}
})
deepRow[newLabel] = labelValues
}
})
// move values for flat labels over from base
structure.__flat.forEach(label => {
deepRow[label] = baseRow[label]
})
output.push(deepRow)
})
return output
}

View File

@@ -1,39 +1,16 @@
import R from 'ramda'
import { fmtObj } from '../lib/util'
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
/**
* groups - generate a Blueprint from a data sheet grouped by a column called 'group'
* The resource name defaults to 'groups', or a custom resource name can be passed.
* Each resource item is an object with values labelled according to column
* names. Items are inserted in the data list at idx = id.
* names. Items are inserted into the data list at idx = id.
*
* @param {type} data list of lists representing sheet data.
* @param {type} label="groups" name of resource in blueprint.
* @param {type} name="" name of blueprint.
* @return {type} Blueprint
* @param {type} data list of lists representing sheet data.
* @return {type} Array the structured data.
*/
export default function groups (
tabName,
sheetName,
sheetId,
data,
label = 'groups'
) {
// Define Blueprint
const bp = R.clone(defaultBlueprint)
bp.sheet = {
name: sheetName,
id: sheetId
}
bp.name = tabName
// Column names define resources
export default (data) => {
const itemLabels = data[0]
const fmt = fmtObj(itemLabels)
bp.resources[label] = R.clone(defaultResource)
bp.resources[label].data = []
const output = []
const dataGroups = {}
data.forEach((row, idx) => {
@@ -45,12 +22,14 @@ export default function groups (
dataGroups[group].push(fmt(row))
}
})
Object.keys(dataGroups).forEach(groupKey => {
bp.resources[label].data.push({
group: groupKey,
group_label: dataGroups[groupKey][0].group_label,
data: dataGroups[groupKey]
Object.keys(dataGroups)
.forEach(groupKey => {
output.push({
group: groupKey,
group_label: dataGroups[groupKey][0].group_label,
data: dataGroups[groupKey]
})
})
})
return bp
return output
}

View File

@@ -1,43 +1,22 @@
import R from 'ramda'
import { fmtObj } from '../lib/util'
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
/**
* ids - generate a Blueprint from a data sheet by id, which is an integer.
* The resource name defaults to 'ids', or a custom resource name can be passed.
* Each resource item is an object with values labelled according to column
* names. Items are inserted in the data list at idx = id.
* Very similar to the rows blueprinter, but inserts each row as a value in
* an object, where the value in the 'id' column of the row will be used as
* the search key
*
* @param {type} data list of lists representing sheet data.
* @param {type} label="ids" name of resource in blueprint.
* @param {type} name="" name of blueprint.
* @return {type} Blueprint
* @param {type} data list of lists representing sheet data.
* @return {type} Object the structured data.
*/
export default function ids (
tabName,
sheetName,
sheetId,
data,
label = 'ids'
) {
// Define Blueprint
const bp = R.clone(defaultBlueprint)
bp.sheet = {
name: sheetName,
id: sheetId
}
bp.name = tabName
// Column names define resources
export default (data) => {
const itemLabels = data[0]
const fmt = fmtObj(itemLabels)
bp.resources[label] = R.clone(defaultResource)
bp.resources[label].data = {}
const output = {}
data.forEach((row, idx) => {
if (idx === 0) return
bp.resources[label].data[fmt(row).id] = fmt(row)
output[fmt(row).id] = fmt(row)
})
return bp
return output
}

View File

@@ -1,41 +1,21 @@
import R from 'ramda'
import { fmtObj } from '../lib/util'
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
/**
* rows - generate a Blueprint from a data sheet by row. The resource name
* defaults to 'rows', or a custom resource name can be passed. Each resource
* item is an object with values labelled according to column names.
* Each resource item is an object with values labelled according
* to column names specified in the sheet's first row.
*
* @param {type} data list of lists representing sheet data.
* @param {type} label="rows" name of resource in blueprint.
* @param {type} name="" name of blueprint.
* @return {type} Blueprint
* @param {type} data list of lists representing sheet data.
* @return {type} Array the structured data.
*/
export default function rows (
tabName,
sheetName,
sheetId,
data,
label = 'rows'
) {
// Define Blueprint
const bp = R.clone(defaultBlueprint)
bp.sheet = {
name: sheetName,
id: sheetId
}
bp.name = tabName
// Column names define resources
export default (data) => {
const itemLabels = data[0]
const fmt = fmtObj(itemLabels)
bp.resources[label] = R.clone(defaultResource)
bp.resources[label].data = []
const output = []
data.forEach((row, idx) => {
if (idx === 0) return
bp.resources[label].data.push(fmt(row))
output.push(fmt(row))
})
return bp
return output
}

View File

@@ -1,36 +1,10 @@
import R from 'ramda'
import { defaultBlueprint, defaultResource } from '../lib/blueprinters'
/**
* tree - generate a Blueprint from a data sheet grouped by a column called 'group'
* The resource name defaults to 'groups', or a custom resource name can be passed.
* Each resource item is an object with values labelled according to column
* names. Items are inserted in the data list at idx = id.
* Each resource item is inserted into a tree. TODO: describe layout.
*
* @param {type} data list of lists representing sheet data.
* @param {type} label="groups" name of resource in blueprint.
* @param {type} name="" name of blueprint.
* @return {type} Blueprint
* @param {type} data list of lists representing sheet data.
* @return {type} Array the structured data.
*/
export default function tree (
tabName,
sheetName,
sheetId,
data,
label = 'tree'
) {
// Define Blueprint
const bp = R.clone(defaultBlueprint)
bp.sheet = {
name: sheetName,
id: sheetId
}
bp.name = tabName
// Column names define resources
bp.resources[label] = R.clone(defaultResource)
bp.resources[label].data = {}
export default (data) => {
const tree = {
key: 'tags',
children: {}
@@ -62,6 +36,5 @@ export default function tree (
}
})
bp.resources[label].data = tree
return bp
return tree
}

View File

@@ -10,10 +10,11 @@ export default {
name: 'example',
id: '1UC7DkCFeUXHfpUxUGruExwFbP4pqVBdJLOKfo6wDDGk',
tabs: {
export_events: [BP.byId, BP.byRow],
export_categories: [BP.byGroup, BP.byRow],
export_sites: BP.byRow,
export_tags: BP.byTree
export_events: [BP.deeprows, BP.rows],
export_categories: [BP.groups, BP.rows],
export_sources: BP.ids,
export_sites: BP.rows,
export_tags: BP.tree
}
}
]

View File

@@ -91,9 +91,9 @@ class Fetcher {
*/
_saveViaBlueprinter (tab, data, blueprinter) {
const saturatedBp = blueprinter(
tab,
this.sheetName,
this.sheetId,
this.sheetName,
tab,
data
)

View File

@@ -24,13 +24,27 @@ export function buildDesaturated (sheetId, sheetName, tab, resource) {
return bp
}
const buildBlueprinter = R.curry((datafierName, datafier, sheetId, sheetName, tabName, data) => {
const bp = R.clone(defaultBlueprint)
bp.sheet = {
name: sheetName,
id: sheetId
}
bp.name = tabName
bp.resources[datafierName] = R.clone(defaultResource)
bp.resources[datafierName].data = datafier(data)
return bp
})
// import all default exports from 'blueprinters' folder
const allBps = {}
const REL_PATH_TO_BPS = '../blueprinters'
const normalizedPath = path.join(__dirname, REL_PATH_TO_BPS)
fs.readdirSync(normalizedPath).forEach(file => {
const bpName = file.replace('.js', '')
allBps[bpName] = require(`${REL_PATH_TO_BPS}/${file}`).default
const datafier = require(`${REL_PATH_TO_BPS}/${file}`).default
allBps[bpName] = buildBlueprinter(bpName, datafier)
})
// NB: revert to ES5 'module.exports' required to make blueprinters from

View File

@@ -3,10 +3,11 @@ import R from 'ramda'
import {
defaultBlueprint,
defaultResource,
columns,
rows
} from '../src/lib/blueprinters'
import rows from '../src/blueprinters/rows'
import deeprows from '../src/blueprinters/deeprows'
const egInput1 = [
['h1', 'h2', 'h3'],
[1, 2, 3],
@@ -25,41 +26,20 @@ test('defaultBlueprint exports', t => {
t.deepEqual(expected, defaultBlueprint)
})
test('columns blueprinter generates expected output', t => {
const actual = columns('eg ColumnBlueprint', 'egSheetName', 'egSheetId', egInput1)
const expected = R.clone(defaultBlueprint)
expected.name = 'eg ColumnBlueprint'
expected.sheet = {
id: 'egSheetId',
name: 'egSheetName'
}
expected.resources['h1'] = R.clone(defaultResource)
expected.resources['h1'].data = [1, 4]
expected.resources['h2'] = R.clone(defaultResource)
expected.resources['h2'].data = [2, 5]
expected.resources['h3'] = R.clone(defaultResource)
expected.resources['h3'].data = [3, 6]
test('rows blueprinter', t => {
const expected = [
{ h1: 1, h2: 2, h3: 3 },
{ h1: 4, h2: 5, h3: 6 },
]
const actual = rows(egInput1)
t.deepEqual(expected, actual)
})
test('rows blueprinter generates expected output', t => {
const actual = rows('egRowBlueprint', 'egSheetName', 'egSheetId', egInput1, 'items')
const expected = R.clone(defaultBlueprint)
expected.name = 'egRowBlueprint'
expected.sheet = {
id: 'egSheetId',
name: 'egSheetName'
}
expected.resources['items'] = R.clone(defaultResource)
expected.resources['items'].data = [{
h1: 1,
h2: 2,
h3: 3
},
{
h1: 4,
h2: 5,
h3: 6
}]
test('deeprows blueprinter', t => {
const expected = [
{ 'hs': [1,2,3] },
{ 'hs': [4,5,6] }
]
const actual = deeprows(egInput1)
t.deepEqual(expected, actual)
})