2020-06-25 10:44:55 +02:00
const csv = require ( 'csv-parser' ) ;
const fs = require ( 'fs' ) ;
const axios = require ( 'axios' ) ;
const { Builder } = require ( 'selenium-webdriver' ) ;
const chrome = require ( 'selenium-webdriver/chrome' ) ;
const pdfReader = require ( 'pdfreader' ) ;
const iconv = require ( 'iconv-lite' ) ;
const metadata = 'C:\\Users\\vle2fe\\Documents\\Data\\Rng_200622\\VZ.csv' ; // metadata file
const nmDocs = 'C:\\Users\\vle2fe\\Documents\\Data\\Rng_200622\\nmDocs' ; // NormMaster Documents
const dptFiles = 'C:\\Users\\vle2fe\\Documents\\Data\\Rng_200622\\DPT' ; // Spectrum files
let data = [ ] ; // metadata contents
let materials = { } ;
let samples = [ ] ;
let normMaster = { } ;
// TODO: integrate measurement device information from DPT names using different users
// TODO: supplier: other for supplierless samples
2020-07-02 12:18:01 +02:00
// TODO: BASF twice, BASF as color
2020-07-06 09:43:04 +02:00
// TODO: trim color names
// TODO: duplicate kf values
2020-06-25 10:44:55 +02:00
main ( ) ;
async function main ( ) {
if ( 0 ) { // materials
await getNormMaster ( ) ;
await importCsv ( ) ;
await allMaterials ( ) ;
fs . writeFileSync ( './data_import/materials.json' , JSON . stringify ( materials ) ) ;
await saveMaterials ( ) ;
}
else if ( 0 ) { // samples
await importCsv ( ) ;
await allSamples ( ) ;
await saveSamples ( ) ;
}
2020-06-30 14:16:37 +02:00
else if ( 1 ) { // DPT
2020-06-25 10:44:55 +02:00
await allDpts ( ) ;
}
2020-06-30 14:16:37 +02:00
else if ( 0 ) { // KF/VZ
2020-06-25 10:44:55 +02:00
await importCsv ( ) ;
await allKfVz ( ) ;
}
else if ( 0 ) { // pdf test
console . log ( await readPdf ( 'N28_BN22-O010_2018-03-08.pdf' ) ) ;
}
}
async function importCsv ( ) {
await new Promise ( resolve => {
fs . createReadStream ( metadata )
. pipe ( iconv . decodeStream ( 'win1252' ) )
. pipe ( csv ( ) )
. on ( 'data' , ( row ) => {
data . push ( row ) ;
} )
. on ( 'end' , ( ) => {
console . info ( 'CSV file successfully processed' ) ;
resolve ( ) ;
} ) ;
} ) ;
}
async function allDpts ( ) {
let res = await axios ( {
method : 'get' ,
url : 'http://localhost:3000/template/measurements' ,
auth : {
username : 'admin' ,
password : 'Abc123!#'
}
} ) ;
const measurement _template = res . data . find ( e => e . name === 'spectrum' ) . _id ;
res = await axios ( {
method : 'get' ,
url : 'http://localhost:3000/samples?status=all' ,
auth : {
username : 'admin' ,
password : 'Abc123!#'
}
} ) ;
const sampleIds = { } ;
res . data . forEach ( sample => {
sampleIds [ sample . number ] = sample . _id ;
} ) ;
const regex = /.*?_(.*?)_(\d+|\d+_\d+).DPT/ ;
const dpts = fs . readdirSync ( dptFiles ) ;
for ( let i in dpts ) {
const regexRes = regex . exec ( dpts [ i ] )
if ( regexRes && sampleIds [ regexRes [ 1 ] ] ) { // found matching sample
console . log ( dpts [ i ] ) ;
const f = fs . readFileSync ( dptFiles + '\\' + dpts [ i ] , 'utf-8' ) ;
const data = {
sample _id : sampleIds [ regexRes [ 1 ] ] ,
values : { } ,
measurement _template
} ;
data . values . dpt = f . split ( '\r\n' ) . map ( e => e . split ( ',' ) ) ;
await axios ( {
method : 'post' ,
url : 'http://localhost:3000/measurement/new' ,
auth : {
username : 'admin' ,
password : 'Abc123!#'
} ,
data
} ) . catch ( err => {
console . log ( dpts [ i ] ) ;
console . error ( err . response . data ) ;
} ) ;
}
}
}
async function allKfVz ( ) {
let res = await axios ( {
method : 'get' ,
url : 'http://localhost:3000/template/measurements' ,
auth : {
username : 'admin' ,
password : 'Abc123!#'
}
} ) ;
const kf _template = res . data . find ( e => e . name === 'kf' ) . _id ;
const vz _template = res . data . find ( e => e . name === 'vz' ) . _id ;
res = await axios ( {
method : 'get' ,
url : 'http://localhost:3000/samples?status=all' ,
auth : {
username : 'admin' ,
password : 'Abc123!#'
}
} ) ;
const sampleIds = { } ;
res . data . forEach ( sample => {
sampleIds [ sample . number ] = sample . _id ;
} ) ;
for ( let index in data ) {
console . info ( ` ${ index } / ${ data . length } ` ) ;
let sample = data [ index ] ;
if ( sample [ 'Sample number' ] !== '' ) {
if ( sample [ 'KF in Gew%' ] ) {
await axios ( {
method : 'post' ,
url : 'http://localhost:3000/measurement/new' ,
auth : {
username : 'admin' ,
password : 'Abc123!#'
} ,
data : {
sample _id : sampleIds [ sample [ 'Sample number' ] ] ,
measurement _template : kf _template ,
values : {
'weight %' : sample [ 'KF in Gew%' ] ,
'standard deviation' : sample [ 'Stabwn' ]
}
}
} ) . catch ( err => {
console . log ( sample [ 'Sample number' ] ) ;
console . error ( err . response . data ) ;
} ) ;
}
if ( sample [ 'VZ (ml/g)' ] ) {
await axios ( {
method : 'post' ,
url : 'http://localhost:3000/measurement/new' ,
auth : {
username : 'admin' ,
password : 'Abc123!#'
} ,
data : {
sample _id : sampleIds [ sample [ 'Sample number' ] ] ,
measurement _template : vz _template ,
values : {
vz : sample [ 'VZ (ml/g)' ]
}
}
} ) . catch ( err => {
console . log ( sample [ 'Sample number' ] ) ;
console . error ( err . response . data ) ;
} ) ;
}
}
}
}
async function allSamples ( ) {
let res = await axios ( {
method : 'get' ,
url : 'http://localhost:3000/materials?status=all' ,
auth : {
username : 'admin' ,
password : 'Abc123!#'
}
} ) ;
const dbMaterials = { }
res . data . forEach ( m => {
dbMaterials [ m . name ] = m ;
} )
res = await axios ( {
method : 'get' ,
url : 'http://localhost:3000/samples?status=all' ,
auth : {
username : 'admin' ,
password : 'Abc123!#'
}
} ) ;
const sampleColors = { } ;
res . data . forEach ( sample => {
sampleColors [ sample . number ] = sample . color ;
} ) ;
for ( let index in data ) {
console . info ( ` ${ index } / ${ data . length } ` ) ;
let sample = data [ index ] ;
if ( sample [ 'Sample number' ] !== '' && sample [ 'Supplier' ] !== '' && sample [ 'Granulate/Part' ] !== '' ) { // TODO: wait for decision about samples without suppliers/color/type
const material = dbMaterials [ trim ( sample [ 'Material name' ] ) ] ;
if ( ! material ) { // could not find material, skipping sample
continue ;
}
console . log ( sample [ 'Material name' ] ) ;
console . log ( material . _id ) ;
samples . push ( {
number : sample [ 'Sample number' ] ,
type : sample [ 'Granulate/Part' ] ,
batch : sample [ 'Charge/batch granulate/part' ] || '' ,
material _id : material . _id ,
notes : {
comment : sample [ 'Comments' ]
}
} ) ;
const si = samples . length - 1 ;
if ( sample [ 'Material number' ] !== '' && material . numbers . find ( e => e . number === sample [ 'Material number' ] ) ) { // TODO: fix because of false material/material number
samples [ si ] . color = material . numbers . find ( e => e . number === sample [ 'Material number' ] ) . color ;
}
else if ( sample [ 'Color' ] && sample [ 'Color' ] !== '' ) {
samples [ si ] . color = material . numbers . find ( e => e . color . indexOf ( sample [ 'Color' ] ) >= 0 ) . color ;
}
else if ( sampleColors [ sample [ 'Sample number' ] . split ( '_' ) [ 0 ] ] ) { // derive color from main sample for kf/vz
samples [ si ] . color = sampleColors [ sample [ 'Sample number' ] . split ( '_' ) [ 0 ] ] ;
}
else { // TODO: no color information at all
samples . pop ( ) ;
}
}
}
}
async function saveSamples ( ) {
for ( let i in samples ) {
console . info ( ` ${ i } / ${ samples . length } ` ) ;
await axios ( {
method : 'post' ,
url : 'http://localhost:3000/sample/new' ,
auth : {
username : 'admin' ,
password : 'Abc123!#'
} ,
data : samples [ i ]
} ) . catch ( err => {
console . log ( samples [ i ] ) ;
console . error ( err . response . data ) ;
} ) ;
}
console . info ( 'saved all samples' ) ;
}
async function allMaterials ( ) {
for ( let index in data ) {
let sample = data [ index ] ;
if ( sample [ 'Sample number' ] !== '' && sample [ 'Supplier' ] !== '' ) { // TODO: wait for decision about supplierless samples
sample [ 'Material name' ] = trim ( sample [ 'Material name' ] ) ;
if ( materials . hasOwnProperty ( sample [ 'Material name' ] ) ) { // material already found at least once
if ( sample [ 'Material number' ] !== '' ) {
if ( materials [ sample [ 'Material name' ] ] . numbers . length === 0 || ! materials [ sample [ 'Material name' ] ] . numbers . find ( e => e . number === stripSpaces ( sample [ 'Material number' ] ) ) ) { // new material number
if ( materials [ sample [ 'Material name' ] ] . numbers . find ( e => e . color === sample [ 'Color' ] && e . number === '' ) ) { // color already in list, only number missing
materials [ sample [ 'Material name' ] ] . numbers . find ( e => e . color === sample [ 'Color' ] && e . number === '' ) . number = stripSpaces ( sample [ 'Material number' ] ) ;
}
else {
materials [ sample [ 'Material name' ] ] . numbers . push ( { color : sample [ 'Color' ] , number : stripSpaces ( sample [ 'Material number' ] ) } ) ;
}
}
}
else if ( sample [ 'Color' ] !== '' ) {
if ( ! materials [ sample [ 'Material name' ] ] . numbers . find ( e => e . color === stripSpaces ( sample [ 'Color' ] ) ) ) { // new material color
materials [ sample [ 'Material name' ] ] . numbers . push ( { color : sample [ 'Color' ] , number : '' } ) ;
}
}
}
else { // new material
console . info ( ` ${ index } / ${ data . length } ${ sample [ 'Material name' ] } ` ) ;
materials [ sample [ 'Material name' ] ] = {
name : sample [ 'Material name' ] ,
supplier : sample [ 'Supplier' ] ,
group : sample [ 'Material' ]
} ;
let tmp = /M(\d+)/ . exec ( sample [ 'Reinforcing material' ] ) ;
materials [ sample [ 'Material name' ] ] . mineral = tmp ? tmp [ 1 ] : 0 ;
tmp = /GF(\d+)/ . exec ( sample [ 'Reinforcing material' ] ) ;
materials [ sample [ 'Material name' ] ] . glass _fiber = tmp ? tmp [ 1 ] : 0 ;
tmp = /CF(\d+)/ . exec ( sample [ 'Reinforcing material' ] ) ;
materials [ sample [ 'Material name' ] ] . carbon _fiber = tmp ? tmp [ 1 ] : 0 ;
materials [ sample [ 'Material name' ] ] . numbers = await numbersFetch ( sample ) ;
console . log ( materials [ sample [ 'Material name' ] ] ) ;
}
}
}
}
async function saveMaterials ( ) {
const mKeys = Object . keys ( materials )
for ( let i in mKeys ) {
await axios ( {
method : 'post' ,
url : 'http://localhost:3000/material/new' ,
auth : {
username : 'admin' ,
password : 'Abc123!#'
} ,
data : materials [ mKeys [ i ] ]
} ) . catch ( err => {
console . log ( materials [ mKeys [ i ] ] ) ;
console . error ( err . response . data ) ;
} ) ;
}
console . info ( 'saved all materials' ) ;
}
async function numbersFetch ( sample ) {
let nm = [ ] ;
let res = [ ] ;
if ( sample [ 'Material number' ] ) { // sample has a material number
nm = normMaster [ stripSpaces ( sample [ 'Material number' ] ) ] ? [ normMaster [ stripSpaces ( sample [ 'Material number' ] ) ] ] : [ ] ;
}
else { // try finding via material name
nm = Object . keys ( normMaster ) . filter ( e => normMaster [ e ] . nameSpaceless === stripSpaces ( sample [ 'Material name' ] ) ) . map ( e => normMaster [ e ] ) ;
}
if ( nm . length > 0 ) {
for ( let i in nm ) {
// if (!fs.readdirSync(nmDocs).find(e => e.indexOf(nm[i].doc.replace(/ /g, '_')) >= 0)) { // document not loaded
// await getNormMasterDoc(nm[i].url.replace(/ /g, '%20'));
// }
// if (!fs.readdirSync(nmDocs).find(e => e.indexOf(nm[i].doc.replace(/ /g, '_')) >= 0)) { // document not loaded
// console.info('Retrying download...');
// await getNormMasterDoc(nm[i].url.replace(/ /g, '%20'), 2.2);
// }
// if (!fs.readdirSync(nmDocs).find(e => e.indexOf(nm[i].doc.replace(/ /g, '_')) >= 0)) { // document not loaded
// console.info('Retrying download again...');
// await getNormMasterDoc(nm[i].url.replace(/ /g, '%20'), 5);
// }
if ( fs . readdirSync ( nmDocs ) . find ( e => e . indexOf ( nm [ i ] . doc . replace ( / /g , '_' ) ) >= 0 ) ) { // document loaded
res = await readPdf ( fs . readdirSync ( nmDocs ) . find ( e => e . indexOf ( nm [ i ] . doc . replace ( / /g , '_' ) ) >= 0 ) ) ;
}
if ( res . length > 0 ) { // no results
break ;
}
else if ( i + 1 >= nm . length ) {
console . error ( 'Download failed!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' ) ;
}
}
}
if ( res . length === 0 ) { // no results
if ( sample [ 'Color' ] !== '' || sample [ 'Material number' ] !== '' ) {
return [ { color : sample [ 'Color' ] , number : sample [ 'Material number' ] } ] ;
}
else {
return [ ] ;
}
}
else {
if ( ! res . find ( e => e . number === sample [ 'Material number' ] ) ) { // sometimes norm master does not include sample number even if listed
res . push ( { color : sample [ 'Color' ] , number : sample [ 'Material number' ] } ) ;
}
return res ;
}
}
async function getNormMaster ( fetchAgain = false ) {
if ( fetchAgain ) {
console . info ( 'fetching norm master...' ) ;
const res = await axios ( {
method : 'get' ,
url : 'http://rb-normen.bosch.com/cgi-bin/searchRBNorm4TradeName'
} ) ;
console . info ( 'finding documents...' ) ;
let match ;
// const regex = /<tr>.*?<td>.*?<\/span>(.*?)<\/td><td>(\d+)<\/td>.*?<a href="(.*?)"/gm;
const regex = /<tr>.*?<td>.*?<\/span>(.*?)<\/td><td>(\d+)<\/td><td>40.*?<a href="(.*?)".*?<\/a>(.*?)<\/td>/gm ; // only valid materials
do {
match = regex . exec ( res . data ) ;
if ( match ) {
normMaster [ match [ 2 ] ] = { name : match [ 1 ] , nameSpaceless : stripSpaces ( match [ 1 ] ) , number : match [ 2 ] , url : match [ 3 ] , doc : match [ 4 ] } ;
}
} while ( match ) ;
fs . writeFileSync ( './data_import/normMaster.json' , JSON . stringify ( normMaster ) ) ;
}
else {
normMaster = JSON . parse ( fs . readFileSync ( './data_import/normMaster.json' ) , 'utf-8' ) ;
}
}
function getNormMasterDoc ( url , timing = 1 ) {
console . log ( url ) ;
return new Promise ( async resolve => {
const options = new chrome . Options ( ) ;
options . setUserPreferences ( {
"download.default_directory" : nmDocs ,
"download.prompt_for_download" : false ,
"download.directory_upgrade" : true ,
"plugins.always_open_pdf_externally" : true
} ) ;
let driver = await new Builder ( ) . forBrowser ( 'chrome' ) . setChromeOptions ( options ) . build ( ) ;
let timeout = 7000 * timing ;
try {
await driver . get ( url ) ;
if ( await driver . getCurrentUrl ( ) !== 'https://rb-wam-saml.bosch.com/tfim/sps/normmaster/saml20/login' ) { // got document selection page
timeout = 11000 * timing ;
await driver . executeScript ( 'Array.prototype.slice.call(document.querySelectorAll(\'.functionlink\')).filter(e => e.innerText === \'English\')[0].click()' ) . catch ( ( ) => { timeout = 0 ; } ) ;
}
}
finally {
setTimeout ( async ( ) => { // wait until download is finished
await driver . quit ( ) ;
resolve ( ) ;
} , timeout ) ;
}
} ) ;
}
function readPdf ( file ) {
return new Promise ( async resolve => {
const countdown = 100 ; // value for text timeout
let table = 0 ; // > 0 when in correct table area
let rows = [ ] ; // found table rows
let lastY = 0 ; // y of last row
let lastX = 0 ; // right x of last item
let lastText = '' ; // text of last item
let lastLastText = '' ; // text of last last item
await new pdfReader . PdfReader ( ) . parseFileItems ( nmDocs + '\\' + file , ( err , item ) => {
if ( item && item . text ) {
if ( ( stripSpaces ( lastLastText + lastText + item . text ) . toLowerCase ( ) . indexOf ( 'colordesignationsupplier' ) >= 0 ) || ( stripSpaces ( lastLastText + lastText + item . text ) . toLowerCase ( ) . indexOf ( 'colordesignatiomsupplier' ) >= 0 ) ) { // table area starts
table = countdown ;
}
if ( table > 0 ) {
// console.log(item);
// console.log(item.y - lastY);
// console.log(item.text);
if ( item . y - lastY > 0.8 && Math . abs ( item . x - lastX ) > 5 ) { // new row
lastY = item . y ;
rows . push ( item . text ) ;
}
else { // still the same row row
rows [ rows . length - 1 ] += ( item . x - lastX > 1.1 ? '$' : '' ) + item . text ; // push to row, detect if still same cell
}
lastX = ( item . w * 0.055 ) + item . x ;
if ( /\d \d\d\d \d\d\d \d\d\d/ . test ( item . text ) ) {
table = countdown ;
}
table -- ;
if ( table <= 0 || item . text . toLowerCase ( ) . indexOf ( 'release document' ) >= 0 || item . text . toLowerCase ( ) . indexOf ( 'normative references' ) >= 0 ) { // table area ended
table = - 1 ;
// console.log(rows);
rows = rows . filter ( e => / ^ \ d { 10 } / m . test ( stripSpaces ( e ) ) ) ; // filter non-table rows
resolve ( rows . map ( e => { return { color : e . split ( '$' ) [ 3 ] , number : stripSpaces ( e . split ( '$' ) [ 0 ] ) } ; } ) ) ;
}
}
lastLastText = lastText ;
lastText = item . text ;
}
if ( ! item && table !== - 1 ) { // document ended
rows = rows . filter ( e => / ^ \ d { 10 } / m . test ( stripSpaces ( e ) ) ) ; // filter non-table rows
resolve ( rows . map ( e => { return { color : e . split ( '$' ) [ 3 ] , number : stripSpaces ( e . split ( '$' ) [ 0 ] ) } ; } ) ) ;
}
} ) ;
} ) ;
}
function stripSpaces ( s ) {
return s ? s . replace ( / /g , '' ) : '' ;
}
function trim ( s ) {
return s . replace ( /(^\s+|\s+$)/gm , '' ) ;
}