488 lines
17 KiB
JavaScript
488 lines
17 KiB
JavaScript
const csv = require('csv-parser');
|
|
const fs = require('fs');
|
|
const axios = require('axios');
|
|
const {Builder} = require('selenium-webdriver');
|
|
const chrome = require('selenium-webdriver/chrome');
|
|
const pdfReader = require('pdfreader');
|
|
const iconv = require('iconv-lite');
|
|
|
|
const metadata = 'C:\\Users\\vle2fe\\Documents\\Data\\Rng_200622\\VZ.csv'; // metadata file
|
|
const nmDocs = 'C:\\Users\\vle2fe\\Documents\\Data\\Rng_200622\\nmDocs'; // NormMaster Documents
|
|
const dptFiles = 'C:\\Users\\vle2fe\\Documents\\Data\\Rng_200622\\DPT'; // Spectrum files
|
|
let data = []; // metadata contents
|
|
let materials = {};
|
|
let samples = [];
|
|
let normMaster = {};
|
|
|
|
// TODO: integrate measurement device information from DPT names using different users
|
|
// TODO: supplier: other for supplierless samples
|
|
// TODO: BASF twice, BASF as color
|
|
// TODO: trim color names
|
|
// TODO: duplicate kf values
|
|
|
|
main();
|
|
|
|
async function main() {
|
|
if (0) { // materials
|
|
await getNormMaster();
|
|
await importCsv();
|
|
await allMaterials();
|
|
fs.writeFileSync('./data_import/materials.json', JSON.stringify(materials));
|
|
await saveMaterials();
|
|
}
|
|
else if (0) { // samples
|
|
await importCsv();
|
|
await allSamples();
|
|
await saveSamples();
|
|
}
|
|
else if (1) { // DPT
|
|
await allDpts();
|
|
}
|
|
else if (0) { // KF/VZ
|
|
await importCsv();
|
|
await allKfVz();
|
|
}
|
|
else if (0) { // pdf test
|
|
console.log(await readPdf('N28_BN22-O010_2018-03-08.pdf'));
|
|
}
|
|
}
|
|
|
|
async function importCsv() {
|
|
await new Promise(resolve => {
|
|
fs.createReadStream(metadata)
|
|
.pipe(iconv.decodeStream('win1252'))
|
|
.pipe(csv())
|
|
.on('data', (row) => {
|
|
data.push(row);
|
|
})
|
|
.on('end', () => {
|
|
console.info('CSV file successfully processed');
|
|
resolve();
|
|
});
|
|
});
|
|
}
|
|
|
|
async function allDpts() {
|
|
let res = await axios({
|
|
method: 'get',
|
|
url: 'http://localhost:3000/template/measurements',
|
|
auth: {
|
|
username: 'admin',
|
|
password: 'Abc123!#'
|
|
}
|
|
});
|
|
const measurement_template = res.data.find(e => e.name === 'spectrum')._id;
|
|
res = await axios({
|
|
method: 'get',
|
|
url: 'http://localhost:3000/samples?status=all',
|
|
auth: {
|
|
username: 'admin',
|
|
password: 'Abc123!#'
|
|
}
|
|
});
|
|
const sampleIds = {};
|
|
res.data.forEach(sample => {
|
|
sampleIds[sample.number] = sample._id;
|
|
});
|
|
const regex = /.*?_(.*?)_(\d+|\d+_\d+).DPT/;
|
|
const dpts = fs.readdirSync(dptFiles);
|
|
for (let i in dpts) {
|
|
const regexRes = regex.exec(dpts[i])
|
|
if (regexRes && sampleIds[regexRes[1]]) { // found matching sample
|
|
console.log(dpts[i]);
|
|
const f = fs.readFileSync(dptFiles + '\\' + dpts[i], 'utf-8');
|
|
const data = {
|
|
sample_id: sampleIds[regexRes[1]],
|
|
values: {},
|
|
measurement_template
|
|
};
|
|
data.values.dpt = f.split('\r\n').map(e => e.split(','));
|
|
await axios({
|
|
method: 'post',
|
|
url: 'http://localhost:3000/measurement/new',
|
|
auth: {
|
|
username: 'admin',
|
|
password: 'Abc123!#'
|
|
},
|
|
data
|
|
}).catch(err => {
|
|
console.log(dpts[i]);
|
|
console.error(err.response.data);
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
async function allKfVz() {
|
|
let res = await axios({
|
|
method: 'get',
|
|
url: 'http://localhost:3000/template/measurements',
|
|
auth: {
|
|
username: 'admin',
|
|
password: 'Abc123!#'
|
|
}
|
|
});
|
|
const kf_template = res.data.find(e => e.name === 'kf')._id;
|
|
const vz_template = res.data.find(e => e.name === 'vz')._id;
|
|
res = await axios({
|
|
method: 'get',
|
|
url: 'http://localhost:3000/samples?status=all',
|
|
auth: {
|
|
username: 'admin',
|
|
password: 'Abc123!#'
|
|
}
|
|
});
|
|
const sampleIds = {};
|
|
res.data.forEach(sample => {
|
|
sampleIds[sample.number] = sample._id;
|
|
});
|
|
for (let index in data) {
|
|
console.info(`${index}/${data.length}`);
|
|
let sample = data[index];
|
|
if (sample['Sample number'] !== '') {
|
|
if (sample['KF in Gew%']) {
|
|
await axios({
|
|
method: 'post',
|
|
url: 'http://localhost:3000/measurement/new',
|
|
auth: {
|
|
username: 'admin',
|
|
password: 'Abc123!#'
|
|
},
|
|
data: {
|
|
sample_id: sampleIds[sample['Sample number']],
|
|
measurement_template: kf_template,
|
|
values: {
|
|
'weight %': sample['KF in Gew%'],
|
|
'standard deviation': sample['Stabwn']
|
|
}
|
|
}
|
|
}).catch(err => {
|
|
console.log(sample['Sample number']);
|
|
console.error(err.response.data);
|
|
});
|
|
}
|
|
if (sample['VZ (ml/g)']) {
|
|
await axios({
|
|
method: 'post',
|
|
url: 'http://localhost:3000/measurement/new',
|
|
auth: {
|
|
username: 'admin',
|
|
password: 'Abc123!#'
|
|
},
|
|
data: {
|
|
sample_id: sampleIds[sample['Sample number']],
|
|
measurement_template: vz_template,
|
|
values: {
|
|
vz: sample['VZ (ml/g)']
|
|
}
|
|
}
|
|
}).catch(err => {
|
|
console.log(sample['Sample number']);
|
|
console.error(err.response.data);
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
async function allSamples() {
|
|
let res = await axios({
|
|
method: 'get',
|
|
url: 'http://localhost:3000/materials?status=all',
|
|
auth: {
|
|
username: 'admin',
|
|
password: 'Abc123!#'
|
|
}
|
|
});
|
|
const dbMaterials = {}
|
|
res.data.forEach(m => {
|
|
dbMaterials[m.name] = m;
|
|
})
|
|
res = await axios({
|
|
method: 'get',
|
|
url: 'http://localhost:3000/samples?status=all',
|
|
auth: {
|
|
username: 'admin',
|
|
password: 'Abc123!#'
|
|
}
|
|
});
|
|
const sampleColors = {};
|
|
res.data.forEach(sample => {
|
|
sampleColors[sample.number] = sample.color;
|
|
});
|
|
|
|
|
|
for (let index in data) {
|
|
console.info(`${index}/${data.length}`);
|
|
let sample = data[index];
|
|
if (sample['Sample number'] !== '' && sample['Supplier'] !== '' && sample['Granulate/Part'] !== '') { // TODO: wait for decision about samples without suppliers/color/type
|
|
const material = dbMaterials[trim(sample['Material name'])];
|
|
if (!material) { // could not find material, skipping sample
|
|
continue;
|
|
}
|
|
console.log(sample['Material name']);
|
|
console.log(material._id);
|
|
samples.push({
|
|
number: sample['Sample number'],
|
|
type: sample['Granulate/Part'],
|
|
batch: sample['Charge/batch granulate/part'] || '',
|
|
material_id: material._id,
|
|
notes: {
|
|
comment: sample['Comments']
|
|
}
|
|
});
|
|
const si = samples.length - 1;
|
|
if (sample['Material number'] !== '' && material.numbers.find(e => e.number === sample['Material number'])) { // TODO: fix because of false material/material number
|
|
samples[si].color = material.numbers.find(e => e.number === sample['Material number']).color;
|
|
}
|
|
else if (sample['Color'] && sample['Color'] !== '') {
|
|
samples[si].color = material.numbers.find(e => e.color.indexOf(sample['Color']) >= 0).color;
|
|
}
|
|
else if (sampleColors[sample['Sample number'].split('_')[0]]) { // derive color from main sample for kf/vz
|
|
samples[si].color = sampleColors[sample['Sample number'].split('_')[0]];
|
|
}
|
|
else { // TODO: no color information at all
|
|
samples.pop();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
async function saveSamples() {
|
|
for (let i in samples) {
|
|
console.info(`${i}/${samples.length}`);
|
|
await axios({
|
|
method: 'post',
|
|
url: 'http://localhost:3000/sample/new',
|
|
auth: {
|
|
username: 'admin',
|
|
password: 'Abc123!#'
|
|
},
|
|
data: samples[i]
|
|
}).catch(err => {
|
|
console.log(samples[i]);
|
|
console.error(err.response.data);
|
|
});
|
|
}
|
|
console.info('saved all samples');
|
|
}
|
|
|
|
async function allMaterials() {
|
|
for (let index in data) {
|
|
let sample = data[index];
|
|
if (sample['Sample number'] !== '' && sample['Supplier'] !== '') { // TODO: wait for decision about supplierless samples
|
|
sample['Material name'] = trim(sample['Material name']);
|
|
if (materials.hasOwnProperty(sample['Material name'])) { // material already found at least once
|
|
if (sample['Material number'] !== '') {
|
|
if (materials[sample['Material name']].numbers.length === 0 || !materials[sample['Material name']].numbers.find(e => e.number === stripSpaces(sample['Material number']))) { // new material number
|
|
if (materials[sample['Material name']].numbers.find(e => e.color === sample['Color'] && e.number === '')) { // color already in list, only number missing
|
|
materials[sample['Material name']].numbers.find(e => e.color === sample['Color'] && e.number === '').number = stripSpaces(sample['Material number']);
|
|
}
|
|
else {
|
|
materials[sample['Material name']].numbers.push({color: sample['Color'], number: stripSpaces(sample['Material number'])});
|
|
}
|
|
}
|
|
}
|
|
else if (sample['Color'] !== '') {
|
|
if (!materials[sample['Material name']].numbers.find(e => e.color === stripSpaces(sample['Color']))) { // new material color
|
|
materials[sample['Material name']].numbers.push({color: sample['Color'], number: ''});
|
|
}
|
|
}
|
|
}
|
|
else { // new material
|
|
console.info(`${index}/${data.length} ${sample['Material name']}`);
|
|
materials[sample['Material name']] = {
|
|
name: sample['Material name'],
|
|
supplier: sample['Supplier'],
|
|
group: sample['Material']
|
|
};
|
|
let tmp = /M(\d+)/.exec(sample['Reinforcing material']);
|
|
materials[sample['Material name']].mineral = tmp ? tmp[1] : 0;
|
|
tmp = /GF(\d+)/.exec(sample['Reinforcing material']);
|
|
materials[sample['Material name']].glass_fiber = tmp ? tmp[1] : 0;
|
|
tmp = /CF(\d+)/.exec(sample['Reinforcing material']);
|
|
materials[sample['Material name']].carbon_fiber = tmp ? tmp[1] : 0;
|
|
materials[sample['Material name']].numbers = await numbersFetch(sample);
|
|
console.log(materials[sample['Material name']]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
async function saveMaterials() {
|
|
const mKeys = Object.keys(materials)
|
|
for (let i in mKeys) {
|
|
await axios({
|
|
method: 'post',
|
|
url: 'http://localhost:3000/material/new',
|
|
auth: {
|
|
username: 'admin',
|
|
password: 'Abc123!#'
|
|
},
|
|
data: materials[mKeys[i]]
|
|
}).catch(err => {
|
|
console.log(materials[mKeys[i]]);
|
|
console.error(err.response.data);
|
|
});
|
|
}
|
|
console.info('saved all materials');
|
|
}
|
|
|
|
async function numbersFetch(sample) {
|
|
let nm = [];
|
|
let res = [];
|
|
if (sample['Material number']) { // sample has a material number
|
|
nm = normMaster[stripSpaces(sample['Material number'])]? [normMaster[stripSpaces(sample['Material number'])]] : [];
|
|
}
|
|
else { // try finding via material name
|
|
nm = Object.keys(normMaster).filter(e => normMaster[e].nameSpaceless === stripSpaces(sample['Material name'])).map(e => normMaster[e]);
|
|
}
|
|
if (nm.length > 0) {
|
|
for (let i in nm) {
|
|
// if (!fs.readdirSync(nmDocs).find(e => e.indexOf(nm[i].doc.replace(/ /g, '_')) >= 0)) { // document not loaded
|
|
// await getNormMasterDoc(nm[i].url.replace(/ /g, '%20'));
|
|
// }
|
|
// if (!fs.readdirSync(nmDocs).find(e => e.indexOf(nm[i].doc.replace(/ /g, '_')) >= 0)) { // document not loaded
|
|
// console.info('Retrying download...');
|
|
// await getNormMasterDoc(nm[i].url.replace(/ /g, '%20'), 2.2);
|
|
// }
|
|
// if (!fs.readdirSync(nmDocs).find(e => e.indexOf(nm[i].doc.replace(/ /g, '_')) >= 0)) { // document not loaded
|
|
// console.info('Retrying download again...');
|
|
// await getNormMasterDoc(nm[i].url.replace(/ /g, '%20'), 5);
|
|
// }
|
|
if (fs.readdirSync(nmDocs).find(e => e.indexOf(nm[i].doc.replace(/ /g, '_')) >= 0)) { // document loaded
|
|
res = await readPdf(fs.readdirSync(nmDocs).find(e => e.indexOf(nm[i].doc.replace(/ /g, '_')) >= 0));
|
|
}
|
|
if (res.length > 0) { // no results
|
|
break;
|
|
}
|
|
else if (i + 1 >= nm.length) {
|
|
console.error('Download failed!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!');
|
|
}
|
|
}
|
|
}
|
|
if (res.length === 0) { // no results
|
|
if (sample['Color'] !== '' || sample['Material number'] !== '') {
|
|
return [{color: sample['Color'], number: sample['Material number']}];
|
|
}
|
|
else {
|
|
return [];
|
|
}
|
|
}
|
|
else {
|
|
if (!res.find(e => e.number === sample['Material number'])) { // sometimes norm master does not include sample number even if listed
|
|
res.push({color: sample['Color'], number: sample['Material number']});
|
|
}
|
|
return res;
|
|
}
|
|
}
|
|
|
|
async function getNormMaster(fetchAgain = false) {
|
|
if (fetchAgain) {
|
|
console.info('fetching norm master...');
|
|
const res = await axios({
|
|
method: 'get',
|
|
url: 'http://rb-normen.bosch.com/cgi-bin/searchRBNorm4TradeName'
|
|
});
|
|
|
|
console.info('finding documents...');
|
|
let match;
|
|
// const regex = /<tr>.*?<td>.*?<\/span>(.*?)<\/td><td>(\d+)<\/td>.*?<a href="(.*?)"/gm;
|
|
const regex = /<tr>.*?<td>.*?<\/span>(.*?)<\/td><td>(\d+)<\/td><td>40.*?<a href="(.*?)".*?<\/a>(.*?)<\/td>/gm; // only valid materials
|
|
do {
|
|
match = regex.exec(res.data);
|
|
if (match) {
|
|
normMaster[match[2]] = {name: match[1], nameSpaceless: stripSpaces(match[1]), number: match[2], url: match[3], doc: match[4]};
|
|
}
|
|
} while (match);
|
|
fs.writeFileSync('./data_import/normMaster.json', JSON.stringify(normMaster));
|
|
}
|
|
else {
|
|
normMaster = JSON.parse(fs.readFileSync('./data_import/normMaster.json'), 'utf-8');
|
|
}
|
|
}
|
|
|
|
function getNormMasterDoc(url, timing = 1) {
|
|
console.log(url);
|
|
return new Promise(async resolve => {
|
|
const options = new chrome.Options();
|
|
options.setUserPreferences({
|
|
"download.default_directory": nmDocs,
|
|
"download.prompt_for_download": false,
|
|
"download.directory_upgrade": true,
|
|
"plugins.always_open_pdf_externally": true
|
|
});
|
|
let driver = await new Builder().forBrowser('chrome').setChromeOptions(options).build();
|
|
let timeout = 7000 * timing;
|
|
try {
|
|
await driver.get(url);
|
|
if (await driver.getCurrentUrl() !== 'https://rb-wam-saml.bosch.com/tfim/sps/normmaster/saml20/login') { // got document selection page
|
|
timeout = 11000 * timing;
|
|
await driver.executeScript('Array.prototype.slice.call(document.querySelectorAll(\'.functionlink\')).filter(e => e.innerText === \'English\')[0].click()').catch(() => {timeout = 0; });
|
|
}
|
|
}
|
|
finally {
|
|
setTimeout(async () => { // wait until download is finished
|
|
await driver.quit();
|
|
resolve();
|
|
}, timeout);
|
|
}
|
|
});
|
|
}
|
|
|
|
function readPdf(file) {
|
|
return new Promise(async resolve => {
|
|
const countdown = 100; // value for text timeout
|
|
let table = 0; // > 0 when in correct table area
|
|
let rows = []; // found table rows
|
|
let lastY = 0; // y of last row
|
|
let lastX = 0; // right x of last item
|
|
let lastText = ''; // text of last item
|
|
let lastLastText = ''; // text of last last item
|
|
await new pdfReader.PdfReader().parseFileItems(nmDocs + '\\' + file, (err, item) => {
|
|
if (item && item.text) {
|
|
if ((stripSpaces(lastLastText + lastText + item.text).toLowerCase().indexOf('colordesignationsupplier') >= 0) || (stripSpaces(lastLastText + lastText + item.text).toLowerCase().indexOf('colordesignatiomsupplier') >= 0)) { // table area starts
|
|
table = countdown;
|
|
}
|
|
if (table > 0) {
|
|
// console.log(item);
|
|
// console.log(item.y - lastY);
|
|
// console.log(item.text);
|
|
if (item.y - lastY > 0.8 && Math.abs(item.x - lastX) > 5) { // new row
|
|
lastY = item.y;
|
|
rows.push(item.text);
|
|
}
|
|
else { // still the same row row
|
|
rows[rows.length - 1] += (item.x - lastX > 1.1 ? '$' : '') + item.text; // push to row, detect if still same cell
|
|
}
|
|
lastX = (item.w * 0.055) + item.x;
|
|
|
|
if (/\d \d\d\d \d\d\d \d\d\d/.test(item.text)) {
|
|
table = countdown;
|
|
}
|
|
table --;
|
|
if (table <= 0 || item.text.toLowerCase().indexOf('release document') >= 0 || item.text.toLowerCase().indexOf('normative references') >= 0) { // table area ended
|
|
table = -1;
|
|
// console.log(rows);
|
|
rows = rows.filter(e => /^\d{10}/m.test(stripSpaces(e))); // filter non-table rows
|
|
resolve(rows.map(e => {return {color: e.split('$')[3], number: stripSpaces(e.split('$')[0])}; }));
|
|
}
|
|
}
|
|
lastLastText = lastText;
|
|
lastText = item.text;
|
|
}
|
|
if (!item && table !== -1) { // document ended
|
|
rows = rows.filter(e => /^\d{10}/m.test(stripSpaces(e))); // filter non-table rows
|
|
resolve(rows.map(e => {return {color: e.split('$')[3], number: stripSpaces(e.split('$')[0])}; }));
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
function stripSpaces(s) {
|
|
return s ? s.replace(/ /g,'') : '';
|
|
}
|
|
|
|
function trim(s) {
|
|
return s.replace(/(^\s+|\s+$)/gm, '');
|
|
} |