2
Fork 0
opus-data/ReadOpus.R

417 lines
18 KiB
R

#Code adapted from github.com/philipp-baumann/simplerspec
read_opus_univ <- function(file_path, limit){
`%do%` <- foreach::`%do%`
extract <- 'spc'
# Avoid `R CMD check` NOTE: no visible binding for global variable ...
x <- y <- i <- npt <- NULL
if (!file.exists(file_path)) {
stop(paste0("File does not exist"))
}
try({
# Read entire content of file as bytes
pa <- hexView::readRaw(file_path, offset = 0,
nbytes = file.info(file_path)$size, human = "char",
size = 1, endian = "little")
# Get raw vector
pr <- pa$fileRaw
# Read byte positions for selected 3 letter strings that flag important
# spectral information -----------------------------------------------------
# Get positions of "END" strings
end <- grepRaw("END", pr, all = TRUE) + 11
# Get all positions of "NPT" (number of points) string
npt_all <- grepRaw("NPT", pr, all = TRUE) + 3
# Get frequency of first (FXV) and last point (LXV) positions
fxv_all <- grepRaw("FXV", pr, all = TRUE) + 7
lxv_all <- grepRaw("LXV", pr, all = TRUE) + 7
# For some files, the number of positions where "FXV" and "LXV" occur
# are not equal, e.g. for the file in
# data/soilspec_esal_bin/BF_mo_01_soil_cal.0 ; As a consequence, the
# fist and last point numbers (e.g. wavenumber or points for interferograms)
# are not correctly read. This results in an error when trying to calculate
# the wavenumbers; The below code is a quick and dirty fix to remove
# FXV values that don't have LXV values and vice versa
# (difference between "LXV" and "FXV" for a spectral data block
# should be 16) ------------------------------------------------------------
if (length(fxv_all) > length(lxv_all)) {
diff_lxv_fxv <- lapply(lxv_all, function(x) x - fxv_all)
# Return list of logical vectors indicating whether difference of fxv
# and lxv is 16 (distance of 16 bytes)
lxv_fxv_min <- lapply(diff_lxv_fxv, function(x) x == 16)
fxv_list <- rep(list(fxv_all), length(fxv_all))
fxv_all <- foreach::foreach(
x = 1:length(fxv_list), y = 1:length(lxv_fxv_min),
.combine = 'c') %do% {
fxv_list[[x]][lxv_fxv_min[[y]]]
}
}
if (length(lxv_all) > length(fxv_all)) {
diff_fxv_lxv <- lapply(fxv_all, function(x) x - lxv_all)
# Return list of logical vectors indicating whether difference of fxv
# and lxv is 16 (distance of 16 bytes)
fxv_lxv_min <- lapply(diff_fxv_lxv, function(x) x == -16)
lxv_list <- rep(list(lxv_all), length(lxv_all))
lxv_all <- foreach::foreach(
x = 1:length(lxv_list), y = 1:length(fxv_lxv_min),
.combine = 'c') %do% {
lxv_list[[x]][fxv_lxv_min[[y]]]
}
}
# Reduce size of npt_all -----------------------------------------------------
# Some files have an extra "NPT" string without FXV, LXV, and spectral block
if (length(npt_all) != length(fxv_all)) {
diff_npt_fxv <- lapply(npt_all, function(x) fxv_all - x)
min_bigger0_smallerequal40 <- lapply(diff_npt_fxv, function(x) {
which_min_bigger0 <- x == min(x[x > 0])
which_smallerequal40 <- x <= 40
which_min_bigger0 & which_smallerequal40
}
)
which_npt_valid <- sapply(min_bigger0_smallerequal40,
function(x) any(x == TRUE))
npt_all <- npt_all[which_npt_valid]
}
# --------------------------------------------------------------------------
## Read basic spectral information =========================================
# Read all number of points (NPT) at once
NPT <- foreach::foreach(npt = npt_all, .combine = 'c') %do% {
hexView::readRaw(
file_path, offset = npt, nbytes = 12, human = "int", size = 4)[[5]][2]
}
# Specific error for file: <"data/soilspec_eth_bin/CI_tb_05_soil_cal.2">
# "Invalid number of bytes" when trying to read spectra
# -> Reason: NPT at position 1 is 995236000 !!!
# Omit this entry in NPT and corresponding byte position in npt_all
# Quick fix ----------------------------------------------------------------
npt_all <- npt_all[NPT < 40000]
NPT <- NPT[NPT < 40000]
# --------------------------------------------------------------------------
# Figure out how many spectral blocks exist and select final spectra
# positions; end_spc is vector of offsets where spectra start
if (length(end) == 1) {
end_spc <- end
} else {
end_spc <- end[diff(end) > 4 * min(NPT)]
}
## Find final spectra information block positions
## that belong to spectra data =============================================
# Save positions that contain possible spectra data block
# standard parameters
spc_param_list <- list(
'npt' = npt_all,
'fxv' = fxv_all,
'lxv' = lxv_all
)
## Return list of final parameters corresponding to data blocks that contain
## spectra, elements are npt (number of points),
## fxv (frequency of first point) and lxv (frequency of last point);
## returned values represent byte positions in the file where spectra
## parameters are stored. --------------------------------------------------
return_spc_param <- function(end_spc, spc_param_list) {
# Difference between any NPT position vector elements end_spc element
# (end_spc[i] is a scalar, constant value at iteration i)
diff_l <- lapply(end_spc, function(x) npt_all - x)
# Test of any vector in list contains -164 (returns list of vectors
# TRUE or FALSE)
isminus164 <- lapply(diff_l, function(x) x == -164)
# Find minimum positive difference within each list
if (length(diff_l) == 1) {sel_min <- list(TRUE)} else {
sel_min <- lapply(diff_l,
function(x) {if (any(x > 0)) {x == min(x[x > 0])} else {x == -164}})
}
# Set FALSE repeated vector in sel_min element where TRUE positions are
# duplicated
which_elem_dupl <- which(duplicated(sapply(sel_min, which)))
if (length(which_elem_dupl) > 1) {
sel_min[which_elem_dupl] <- NULL
# Reduce end_spc with duplicated elements
end_spc <- end_spc[-which_elem_dupl]
}
# Select minimum difference NPT position for each END position
npt_min <- Map(function(x, y) x[y],
rep(list(npt_all), length(end_spc)), sel_min)
npt_min <- Filter(length, npt_min)
# Select spectra parameters that immediately follow END positions before
# corresponding spectra
param_min <- foreach::foreach(i = 1:length(spc_param_list),
.final = function(i) setNames(i, names(spc_param_list))) %do% {
Map(function(x, y) x[y],
rep(list(spc_param_list[[i]]), length(end_spc)), sel_min)
}
# Test if any difference in list is -164
if (any(unlist(isminus164) == TRUE)) {
# Find all list element that contain TRUE in logical vector
minus164 <- lapply(isminus164, function(x) Find(isTRUE, x))
# Return element position of last TRUE in list
where <- function(f, x) {
vapply(x, f, logical(1))
}
last_minus164 <- Position(isTRUE, where(isTRUE, minus164),
right = TRUE)
# Replace positions in parameter list are at positions of last
# -164 difference between end_spc element and NPT position
param_min <- foreach::foreach(i = 1:length(spc_param_list),
.final = function(i) setNames(i, names(spc_param_list))) %do% {
param_min[[i]][[last_minus164]] <-
spc_param_list[[i]][isminus164[[last_minus164]]]
param_min[[i]]
}
}
# Return list of final parameters corresponding to data blocks that
# contain spectra
param_spc <- lapply(param_min, unlist)
param_spc$end_spc <- end_spc
param_spc
}
# Save spectra parameter list
param_spc <- return_spc_param(end_spc, spc_param_list)
# Create individual vectors containing spectra parameters
npt_spc <- param_spc[["npt"]]
fxv_spc <- param_spc[["fxv"]]
lxv_spc <- param_spc[["lxv"]]
end_spc <- param_spc[["end_spc"]]
# Read number of points corresponding to spectra in file -------------------
NPT_spc <- foreach::foreach(i = 1:length(npt_spc), .combine = 'c') %do% {
hexView::readRaw(
file_path, offset = npt_spc[i],
nbytes = 12, human = "int", size = 4)[[5]][2]
}
# Delete NPT with negative signs
NPT_spc <- NPT_spc[NPT_spc > 0]
## Read all spectra ========================================================
spc <- Map(function(end, NPT) hexView::readRaw(file_path, width = NULL,
offset = end - 4, nbytes = NPT * 4,
human = "real", size = 4, endian = "little")[[5]], end_spc, NPT_spc)
# Read FXV and LXV and calculate wavenumbers ------------------------------
FXV_spc <- foreach::foreach(i = 1:length(fxv_spc), .combine = 'c') %do% {
hexView::readRaw(file_path,
offset = fxv_spc[i], nbytes = 16, human = "real", size = 8)[[5]][1]
}
LXV_spc <- foreach::foreach(i = 1:length(lxv_spc), .combine = 'c') %do% {
hexView::readRaw(file_path,
offset = lxv_spc[i], nbytes = 16, human = "real", size = 8)[[5]][1]
}
# Calculate wavenumbers
wavenumbers <- foreach::foreach(i = 1:length(FXV_spc)) %do% {
rev(seq(LXV_spc[i], FXV_spc[i],
(FXV_spc[i] - LXV_spc[i]) / (NPT_spc[i] - 1)))
}
## Assigning list of intially read spectra depending on block type =========
# Assign an index name to the spectra and parameters for reading
names(end_spc) <- paste0("idx", 1:length(end_spc))
names(spc) <- paste0("idx", 1:length(spc))
names(NPT_spc) <- paste0("idx", 1:length(NPT_spc))
names(FXV_spc) <- paste0("idx", 1:length(FXV_spc))
names(wavenumbers) <- paste0("idx", 1:length(wavenumbers))
# Check if elements in FXV_spc (frequency of first point) are equal to 0;
# these are interferogram spectra ------------------------------------------
which_Ig <- FXV_spc[which(FXV_spc == 0)]
Ig_assigned <- if (length(which_Ig) == 0) {
NULL
} else if (length(which_Ig) == 1) {
list(
spc_idx = names(which_Ig),
spc_code = "IgSm"
)
} else if (length(which_Ig) == 3) {
list(
spc_idx = names(which_Ig)[c(1, 3)],
spc_code = c("IgSm", "IgRf")
)
} else {
list(
spc_idx = names(which_Ig),
spc_code = c("IgSm", "IgRf")
)
}
na_assigned <- list(
spc_idx = NULL,
spc_code = NULL
)
if (length(which_Ig) == 3) {
# Assign duplicated interferogram spectrum to 'not available' assigned
na_assigned <- list(
spc_idx = names(which_Ig)[2],
spc_code = NA
)
}
# Remove NA assigned spectra in spc list -------------------------------------
if (!is.null(na_assigned$spc_idx)) {
spc[na_assigned$spc_idx] <- NULL
# Remove wavenumbers with NA assigned spectra in spc list
wavenumbers[na_assigned$spc_idx] <- NULL
}
# Assign single channel spectra if present in file -------------------------
# Return idx (index names) of all remaining spectra that are not
# interferograms
notIg <- names(spc)[!names(spc) %in%
c(Ig_assigned$spc_idx, na_assigned$spc_idx)]
# Check if the MIR range was measured
wavenumbers_mir <- lapply(names(wavenumbers[notIg]),
function(i) spc[[i]][wavenumbers[notIg][[i]] < 2392 &
wavenumbers[notIg][[i]] > 2358])
is_mir <- any(sapply(wavenumbers_mir, function(x) length(x) != 0))
if (isTRUE(is_mir)) {
# Calculate peak ratio for absorbance at around 2392 cm^(-1)
# and 2358 cm^(-1)
peak_ratio <- lapply(
lapply(names(wavenumbers[notIg]),
function(i) spc[[i]][wavenumbers[notIg][[i]] < 2392 &
wavenumbers[notIg][[i]] > 2358]),
function(j) j[[1]] / j[[length(j)]]
)
names(peak_ratio) <- names(spc[notIg])
# Single channel (Sc) assignment list
which_Sc <- names(which(peak_ratio > 2))
} else {
peak_ratio <- lapply(
lapply(names(wavenumbers[notIg]),
function(i) spc[[i]][wavenumbers[notIg][[i]] < 5340 &
wavenumbers[notIg][[i]] > 5318]),
function(j) j[[1]] / j[[length(j)]]
)
names(peak_ratio) <- names(spc[notIg])
# Single channel (Sc) assignment list
which_Sc <- names(which(peak_ratio < 0.9))
}
# Check for single channel, exclude spectral blocks already assigned to
# interferograms
Sc_assigned <- if (length(which_Sc) == 0) {
NULL
} else if (length(which_Sc) == 1) {
list(
spc_idx = which_Sc,
spc_code = "ScSm"
)
} else {
list(
spc_idx = which_Sc,
spc_code = c("ScSm", "ScRf")
)
}
# Assign corrected and uncorrected (if present) ----------------------------
# AB spectra list
which_AB <- names(spc)[!names(spc) %in%
c(Ig_assigned[["spc_idx"]], na_assigned[["spc_idx"]],
Sc_assigned[["spc_idx"]])]
AB_assigned <- if (length(which_AB) == 1) {
list(
spc_idx = which_AB,
spc_code = "spc"
)
} else {
list(
spc_idx = which_AB,
spc_code = c("spc_nocomp", "spc")
)
}
# Read result spectrum with new offset (no `-4`) when atmospheric
# compensation was done by the OPUS software; replace the spectrum position
# with index name idx that corresponds to final spectrum after atmospheric
# compensation; OPUS files from particular spectrometers/OPUS software
# versions do still need the same offset end_spc[[spc_idx]] - 4 as the other
# spectra types; new argument atm_comp_minus4offset (default FALSE) is a
# quick fix to read files with different offsets after atmospheric
# compensation -------------------------------------------------------------
if (length(which_AB) == 2 && !atm_comp_minus4offset) {
spc[[which_AB[length(which_AB)]]] <-
hexView::readRaw(file_path, width = NULL,
offset = end_spc[which_AB[length(which_AB)]],
nbytes = NPT_spc[which_AB[length(which_AB)]] * 4,
human = "real", size = 4, endian = "little")[[5]]
}
# Assign spectra type for final spectra in element names of spc list -------
# Combine spectral assignments lists
list_assigned <- list(
'Ig' = Ig_assigned,
'Sc' = Sc_assigned,
'AB' = AB_assigned
)
# Transpose spectra assignment list, first remove NULL elements in list
list_assigned_t <- purrr::transpose(
Filter(Negate(function(x) is.null(unlist(x))), list_assigned)
)
# Save spectra code (spc_code)
# in character vector
spc_code <- unlist(list_assigned_t[["spc_code"]])
# Order spc_idx from 1 to n spectra (n = length of end_spc)
order_spc <- as.numeric(
sub(".*idx", "", unlist(list_assigned_t[["spc_idx"]])))
spc_type <- spc_code[order(order_spc)]
# Set spectrum type as element names of spectra list (spc)
names(spc) <- spc_type
# Set spectrum type in wavenumbers list
names(wavenumbers) <- spc_type
## Get additional parameters from OPUS binary file =========================
# Optics parameters --------------------------------------------------------
csf_all <- grepRaw("CSF", pr, all = TRUE) + 7 # y-scaling factor
# Read only CSF byte positions that correspond to final spectra
CSF <- lapply(csf_all[npt_all %in% npt_spc],
function(csf) hexView::readRaw(
file_path, offset = csf, nbytes = 8, human = "real", size = 8)[[5]][1])
# Scale all spectra with y-scaling factor if any of spectra types present
# in file are not 1 --------------------------------------------------------
# Set names of CSF elements equal to spectra list element names
names(CSF) <- names(spc)
if (any(unlist(CSF) != 1)) {
# Return all elements in CSF that have scaling value not equal to 1
CSF_toscale <- Filter(function(x) x != 1, CSF)
# Apply scaling for spectra with CSF value not equal to 1;
# Map() returns list
spc_scaled <- Map(function(CSF, spc) CSF * spc,
unlist(CSF_toscale), spc[names(CSF_toscale)])
# Replace all spc list elements that have CSF not equal 1 with
# scaled values
spc <- replace(x = spc, list = names(CSF_toscale), values = spc_scaled)
}
## Allocate and return data from spectra in output list (out) ==============
out <- data.frame(round(wavenumbers[['spc']], limit), round(spc[['spc']], limit), row.names = NULL)
# Return spectra data and metadata contained as elements in list out
out
}) # closes try() function
}