const csv = require('csv-parser'); const fs = require('fs'); const axios = require('axios'); const {Builder} = require('selenium-webdriver'); // selenium and the chrome driver must be installed and configured separately const chrome = require('selenium-webdriver/chrome'); const pdfReader = require('pdfreader'); const iconv = require('iconv-lite'); const metaDoc = 'C:\\Users\\vle2fe\\Documents\\Data\\Rng_200707\\metadata.csv'; // metadata files const kfDoc = 'C:\\Users\\vle2fe\\Documents\\Data\\Rng_200707\\kf.csv'; const vzDoc = 'C:\\Users\\vle2fe\\Documents\\Data\\Rng_200707\\vz.csv'; const nmDocs = 'C:\\Users\\vle2fe\\Documents\\Data\\Rng_200707\\nmDocs'; // NormMaster Documents const dptFiles = 'C:\\Users\\vle2fe\\Documents\\Data\\Rng_200707\\DPT'; // Spectrum files const host = 'http://localhost:3000'; // const host = 'https://definma-api.apps.de1.bosch-iot-cloud.com'; let data = []; // metadata contents let materials = {}; let samples = []; let normMaster = {}; let sampleDevices = {}; // TODO: BASF twice, BASF as color // TODO: duplicate kf values // TODO: conditions // TODO: comment and reference handling // TODO: check last color errors (filter out already taken) use location and device for user, upload to BIC main(); async function main() { if (0) { // materials await getNormMaster(); await importCsv(metaDoc); await allMaterials(); await saveMaterials(); await importCsv(kfDoc); await allMaterials(); await saveMaterials(); await importCsv(vzDoc); await allMaterials(); await saveMaterials(); } if (0) { // samples sampleDeviceMap(); if (1) { console.log('-------- META ----------'); await importCsv(metaDoc); await allSamples(); await saveSamples(); } if (1) { console.log('-------- KF ----------'); await importCsv(kfDoc); await allSamples(); await saveSamples(); await allKfVz(); } if (1) { console.log('-------- VZ ----------'); await importCsv(vzDoc); await allSamples(); await saveSamples(); await allKfVz(); } } if (1) { // DPT await allDpts(); } if (0) { // pdf test console.log(await readPdf('N28_BN05-OX013_2016-03-11.pdf')); } } async function importCsv(doc) { data = []; await new Promise(resolve => { fs.createReadStream(doc) .pipe(iconv.decodeStream('win1252')) .pipe(csv()) .on('data', (row) => { data.push(row); }) .on('end', () => { console.info('CSV file successfully processed'); if (data[0]['Farbe']) { // fix German column names data.map(e => {e['Color'] = e['Farbe']; return e; }); } resolve(); }); }); } async function allDpts() { let res = await axios({ method: 'get', url: host + '/template/measurements', auth: { username: 'admin', password: 'Abc123!#' } }); const measurement_template = res.data.find(e => e.name === 'spectrum')._id; res = await axios({ method: 'get', url: host + '/samples?status=all', auth: { username: 'admin', password: 'Abc123!#' } }); const sampleIds = {}; res.data.forEach(sample => { sampleIds[sample.number] = sample._id; }); const dptRegex = /.*?_(.*?)_(\d+|\d+_\d+).DPT/; const dpts = fs.readdirSync(dptFiles); for (let i in dpts) { const regexRes = dptRegex.exec(dpts[i]) if (regexRes && sampleIds[regexRes[1]]) { // found matching sample console.log(dpts[i]); const f = fs.readFileSync(dptFiles + '\\' + dpts[i], 'utf-8'); const data = { sample_id: sampleIds[regexRes[1]], values: {}, measurement_template }; data.values.dpt = f.split('\r\n').map(e => e.split(',')); let rescale = false; for (let i in data.values.dpt) { if (data.values.dpt[i][1] > 2) { rescale = true; break; } } if (rescale) { data.values.dpt = data.values.dpt.map(e => [e[0], e[1] / 100]); } await axios({ method: 'post', url: host + '/measurement/new', auth: { username: 'admin', password: 'Abc123!#' }, data }).catch(err => { console.log(dpts[i]); console.error(err.response.data); }); } else { console.log(`Could not find sample for ${dpts[i]} !!!!!!`); } } } async function allKfVz() { let res = await axios({ method: 'get', url: host + '/template/measurements', auth: { username: 'admin', password: 'Abc123!#' } }); const kf_template = res.data.find(e => e.name === 'kf')._id; const vz_template = res.data.find(e => e.name === 'vz')._id; res = await axios({ method: 'get', url: host + '/samples?status=all', auth: { username: 'admin', password: 'Abc123!#' } }); const sampleIds = {}; res.data.forEach(sample => { sampleIds[sample.number] = sample._id; }); for (let index in data) { console.info(`${index}/${data.length}`); let sample = data[index]; if (sample['Sample number'] !== '') { let credentials = ['admin', 'Abc123!#']; if (sampleDevices[sample['Sample number']]) { credentials = [sampleDevices[sample['Sample number']], '2020DeFinMachen!'] } if (sample['KF in Gew%']) { await axios({ method: 'post', url: host + '/measurement/new', auth: { username: credentials[0], password: credentials[1] }, data: { sample_id: sampleIds[sample['Sample number']], measurement_template: kf_template, values: { 'weight %': sample['KF in Gew%'], 'standard deviation': sample['Stabwn'] } } }).catch(err => { console.log(sample['Sample number']); console.error(err.response.data); }); } if (sample['VZ (ml/g)']) { await axios({ method: 'post', url: host + '/measurement/new', auth: { username: credentials[0], password: credentials[1] }, data: { sample_id: sampleIds[sample['Sample number']], measurement_template: vz_template, values: { vz: sample['VZ (ml/g)'] } } }).catch(err => { console.log(sample['Sample number']); console.error(err.response.data); }); } } } } async function allSamples() { samples = []; let res = await axios({ method: 'get', url: host + '/materials?status=all', auth: { username: 'admin', password: 'Abc123!#' } }); const dbMaterials = {} res.data.forEach(m => { dbMaterials[m.name] = m; }) res = await axios({ method: 'get', url: host + '/samples?status=all', auth: { username: 'admin', password: 'Abc123!#' } }); const sampleColors = {}; res.data.forEach(sample => { sampleColors[sample.number] = sample.color; }); for (let index in data) { console.info(`${index}/${data.length}`); let sample = data[index]; if (sample['Sample number'] !== '') { // TODO: what about samples without color if (sample['Supplier'] === '') { // empty supplier fields sample['Supplier'] = 'unknown'; } if (sample['Granulate/Part'] === '') { // empty supplier fields sample['Granulate/Part'] = 'unknown'; } const material = dbMaterials[trim(sample['Material name'])]; if (!material) { // could not find material, skipping sample continue; } console.log(sample['Material name']); console.log(material._id); samples.push({ number: sample['Sample number'], type: sample['Granulate/Part'], batch: sample['Charge/batch granulate/part'] || '', material_id: material._id, notes: { comment: sample['Comments'] } }); const si = samples.length - 1; if (sample['Material number'] !== '' && material.numbers.find(e => e.number === sample['Material number'])) { // TODO: fix because of false material/material number samples[si].color = material.numbers.find(e => e.number === sample['Material number']).color; } else if (sample['Color'] && sample['Color'] !== '') { let number = material.numbers.find(e => e.color.indexOf(trim(sample['Color'])) >= 0); if (!number && /black/.test(sample['Color'])) { // special case bk for black number = material.numbers.find(e => e.color.toLowerCase().indexOf('bk') >= 0); if (!number) { // try German word number = material.numbers.find(e => e.color.toLowerCase().indexOf('schwarz') >= 0); } } samples[si].color = number.color; } else if (sampleColors[sample['Sample number'].split('_')[0]]) { // derive color from main sample for kf/vz samples[si].color = sampleColors[sample['Sample number'].split('_')[0]]; } else { samples[si].color = ''; } } } } async function saveSamples() { for (let i in samples) { console.info(`${i}/${samples.length}`); let credentials = ['admin', 'Abc123!#']; if (sampleDevices[samples[i].number]) { credentials = [sampleDevices[samples[i].number], '2020DeFinMachen!'] } await axios({ method: 'post', url: host + '/sample/new', auth: { username: credentials[0], password: credentials[1] }, data: samples[i] }).catch(err => { if (err.response.data.status && err.response.data.status !== 'Sample number already taken') { console.log(samples[i]); console.error(err.response.data); } }); } console.info('saved all samples'); } async function allMaterials() { materials = {}; for (let index in data) { let sample = data[index]; if (sample['Sample number'] && sample['Sample number'] !== '') { if (sample['Supplier'] === '') { // empty supplier fields sample['Supplier'] = 'unknown'; } if (sample['Material name'] === '') { // empty name fields sample['Material name'] = sample['Material']; } if (!sample['Material']) { // column Material is named Plastic in VZ metadata sample['Material'] = sample['Plastic']; } sample['Material name'] = trim(sample['Material name']); if (materials.hasOwnProperty(sample['Material name'])) { // material already found at least once if (sample['Material number'] && sample['Material number'] !== '') { if (materials[sample['Material name']].numbers.length === 0 || !materials[sample['Material name']].numbers.find(e => e.number === stripSpaces(sample['Material number']))) { // new material number if (materials[sample['Material name']].numbers.find(e => e.color === sample['Color'] && e.number === '')) { // color already in list, only number missing materials[sample['Material name']].numbers.find(e => e.color === sample['Color'] && e.number === '').number = stripSpaces(sample['Material number']); } else { materials[sample['Material name']].numbers.push({color: trim(sample['Color']), number: stripSpaces(sample['Material number'])}); } } } else if (sample['Color'] && sample['Color'] !== '') { if (!materials[sample['Material name']].numbers.find(e => e.color === stripSpaces(sample['Color']))) { // new material color materials[sample['Material name']].numbers.push({color: trim(sample['Color']), number: ''}); } } } else { // new material console.info(`${index}/${data.length} ${sample['Material name']}`); materials[sample['Material name']] = { name: sample['Material name'], supplier: trim(sample['Supplier']), group: trim(sample['Material']) }; let tmp = /M(\d+)/.exec(sample['Reinforcing material']); materials[sample['Material name']].mineral = tmp ? tmp[1] : 0; tmp = /GF(\d+)/.exec(sample['Reinforcing material']); materials[sample['Material name']].glass_fiber = tmp ? tmp[1] : 0; tmp = /CF(\d+)/.exec(sample['Reinforcing material']); materials[sample['Material name']].carbon_fiber = tmp ? tmp[1] : 0; materials[sample['Material name']].numbers = await numbersFetch(sample); console.log(materials[sample['Material name']]); } } } } async function saveMaterials() { const mKeys = Object.keys(materials) for (let i in mKeys) { console.info(`${i}/${mKeys.length}`); await axios({ method: 'post', url: host + '/material/new', auth: { username: 'admin', password: 'Abc123!#' }, data: materials[mKeys[i]] }).catch(err => { if (err.response.data.status && err.response.data.status !== 'Material name already taken') { console.info(materials[mKeys[i]]); console.error(err.response.data); } }); } console.info('saved all materials'); } async function numbersFetch(sample) { let nm = []; let res = []; if (sample['Material number']) { // sample has a material number nm = normMaster[stripSpaces(sample['Material number'])]? [normMaster[stripSpaces(sample['Material number'])]] : []; } else { // try finding via material name nm = Object.keys(normMaster).filter(e => normMaster[e].nameSpaceless === stripSpaces(sample['Material name'])).map(e => normMaster[e]); } if (nm.length > 0) { for (let i in nm) { // if (!fs.readdirSync(nmDocs).find(e => e.indexOf(nm[i].doc.replace(/ /g, '_')) >= 0)) { // document not loaded // await getNormMasterDoc(nm[i].url.replace(/ /g, '%20')); // } // if (!fs.readdirSync(nmDocs).find(e => e.indexOf(nm[i].doc.replace(/ /g, '_')) >= 0)) { // document not loaded // console.info('Retrying download...'); // await getNormMasterDoc(nm[i].url.replace(/ /g, '%20'), 2.2); // } // if (!fs.readdirSync(nmDocs).find(e => e.indexOf(nm[i].doc.replace(/ /g, '_')) >= 0)) { // document not loaded // console.info('Retrying download again...'); // await getNormMasterDoc(nm[i].url.replace(/ /g, '%20'), 5); // } if (fs.readdirSync(nmDocs).find(e => e.indexOf(nm[i].doc.replace(/ /g, '_')) >= 0)) { // document loaded res = await readPdf(fs.readdirSync(nmDocs).find(e => e.indexOf(nm[i].doc.replace(/ /g, '_')) >= 0)); } if (res.length > 0) { // no results break; } else if (i + 1 >= nm.length) { console.error('Download failed!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'); } } } if (res.length === 0) { // no results if ((sample['Color'] && sample['Color'] !== '') || (sample['Material number'] &&sample['Material number'] !== '')) { return [{color: trim(sample['Color']), number: sample['Material number']}]; } else { return []; } } else { if (sample['Material number'] && !res.find(e => e.number === sample['Material number'])) { // sometimes norm master does not include sample number even if listed res.push({color: trim(sample['Color']), number: sample['Material number']}); } return res; } } async function getNormMaster(fetchAgain = false) { if (fetchAgain) { console.info('fetching norm master...'); const res = await axios({ method: 'get', url: 'http://rb-normen.bosch.com/cgi-bin/searchRBNorm4TradeName' }); console.info('finding documents...'); let match; // const regex = /.*?.*?<\/span>(.*?)<\/td>(\d+)<\/td>.*?.*?.*?<\/span>(.*?)<\/td>(\d+)<\/td>40.*?(.*?)<\/td>/gm; // only valid materials do { match = regex.exec(res.data); if (match) { normMaster[match[2]] = {name: match[1], nameSpaceless: stripSpaces(match[1]), number: match[2], url: match[3], doc: match[4]}; } } while (match); fs.writeFileSync('./data_import/normMaster.json', JSON.stringify(normMaster)); } else { normMaster = JSON.parse(fs.readFileSync('./data_import/normMaster.json'), 'utf-8'); } } function getNormMasterDoc(url, timing = 1) { console.info(url); return new Promise(async resolve => { const options = new chrome.Options(); options.setUserPreferences({ "download.default_directory": nmDocs, "download.prompt_for_download": false, "download.directory_upgrade": true, "plugins.always_open_pdf_externally": true }); let driver = await new Builder().forBrowser('chrome').setChromeOptions(options).build(); let timeout = 7000 * timing; try { await driver.get(url); if (await driver.getCurrentUrl() !== 'https://rb-wam-saml.bosch.com/tfim/sps/normmaster/saml20/login') { // got document selection page timeout = 11000 * timing; await driver.executeScript('Array.prototype.slice.call(document.querySelectorAll(\'.functionlink\')).filter(e => e.innerText === \'English\')[0].click()').catch(() => {timeout = 0; }); } } finally { setTimeout(async () => { // wait until download is finished await driver.quit(); resolve(); }, timeout); } }); } function readPdf(file) { return new Promise(async resolve => { const countdown = 100; // value for text timeout let table = 0; // > 0 when in correct table area let rows = []; // found table rows let lastY = 0; // y of last row let lastX = 0; // right x of last item let lastText = ''; // text of last item let lastLastText = ''; // text of last last item await new pdfReader.PdfReader().parseFileItems(nmDocs + '\\' + file, (err, item) => { if (item && item.text) { if ((stripSpaces(lastLastText + lastText + item.text).toLowerCase().indexOf('colordesignationsupplier') >= 0) || (stripSpaces(lastLastText + lastText + item.text).toLowerCase().indexOf('colordesignatiomsupplier') >= 0)) { // table area starts table = countdown; } if (table > 0) { // console.log(item); // console.log(item.y - lastY); // console.log(item.text); if (item.y - lastY > 0.8 && Math.abs(item.x - lastX) > 5) { // new row lastY = item.y; rows.push(item.text); } else { // still the same row row rows[rows.length - 1] += (item.x - lastX > 1.09 ? '$' : '') + item.text; // push to row, detect if still same cell } lastX = (item.w * 0.055) + item.x; if (/\d \d\d\d \d\d\d \d\d\d/.test(item.text)) { table = countdown; } table --; if (table <= 0 || item.text.toLowerCase().indexOf('release document') >= 0 || item.text.toLowerCase().indexOf('normative references') >= 0) { // table area ended table = -1; // console.log(rows); rows = rows.filter(e => /^\d{10}/m.test(stripSpaces(e))); // filter non-table rows resolve(rows.map(e => {return {color: trim(e.split('$')[3]), number: stripSpaces(e.split('$')[0])}; })); } } lastLastText = lastText; lastText = item.text; } if (!item && table !== -1) { // document ended rows = rows.filter(e => /^\d{10}/m.test(stripSpaces(e))); // filter non-table rows resolve(rows.map(e => {return {color: trim(e.split('$')[3]), number: stripSpaces(e.split('$')[0])}; })); } }); }); } function sampleDeviceMap() { const dpts = fs.readdirSync(dptFiles); const regex = /(.*?)_(.*?)_(\d+|[^_]+_\d+).DPT/; for (let i in dpts) { const regexRes = regex.exec(dpts[i]) if (regexRes) { // found matching sample sampleDevices[regexRes[2]] = regexRes[1] === 'plastics' ? 'rng01' : regexRes[1].toLowerCase(); } } } function stripSpaces(s) { return s ? s.replace(/ /g,'') : ''; } function trim(s) { return s.replace(/(^\s+|\s+$)/gm, ''); }