Back to top

Gists

Subscribe to Gists feed
Recent content in humus.rocks on humus.rocks - soil is life on the rocks
Updated: 57 min 34 sec ago

Comparision of NASIS or KSSL point data versus SoilGrids point query versus mass-preserving splines using new methods soilDB::fetchSoilGrids and aqp::spc2mpspline

Thu, 08/20/2020 - 21:18
NCSS_soilgrids_mpspline_comparison.R # get latest aqp (1.24) and soilDB (2.5.7) from GitHub # remotes::install_github("ncss-tech/aqp", dependencies = FALSE) # remotes::install_github("ncss-tech/soilDB", dependencies = FALSE) # load packages library(aqp) library(soilDB) # get the classic loafercreek dataset data(loafercreek) # or use KSSL # loafercreek <- fetchKSSL(series = "Loafercreek") # get the location information locs <- data.frame(id = profile_id(loafercreek), site(loafercreek)[,c("y","x")], stringsAsFactors = FALSE) # fetch the first 6 pedon locations from SoilGrids sgr <- fetchSoilGrids(head(locs[complete.cases(locs),]), c("id", "y", "x")) # get the first 6 pedons from NASIS data (loafercreek dataset) lcr <- head(filter(loafercreek, complete.cases(locs))) profile_id(lcr) <- paste0(profile_id(lcr), "_OG") # promote loafercreek to same spatial CRS coordinates(lcr) <- ~ x + y proj4string(lcr) <- "+proj=longlat +datum=WGS84" # create unique profile ID prior to union profile_id(sgr) <- paste0(profile_id(sgr),"_SG") # convert mean clay from SoilGrids to same scale as field clay content sgr$claycombined <- sgr$clay.mean / 10 lcr$claycombined <- lcr$clay # combine lcsg <- aqp::union(list(sgr, lcr)) # compare in horizon-aggregated form plot(lcsg, color = "claycombined") # iterating by profile outside of mpspline ensures full depth per profile is captured lcsg_spline <- aqp::union(profileApply(lcsg, function(x) { spc2mpspline(x, var_name = "claycombined") })) profile_id(lcsg_spline) <- paste0(profile_id(lcsg_spline), "_spline") # create a combined variable for displaying field, SoilGrids and splines together lcsg$finalclay <- lcsg$claycombined lcsg_spline$finalclay <- lcsg_spline$claycombined_spline # compare in spline'd form lcsg_all <- aqp::union(list(lcsg, lcsg_spline)) plot(lcsg_all, color = "finalclay", divide.hz = FALSE)

Basic comparison of aggregation of KSSL data via group_by+summarize versus slab

Tue, 08/18/2020 - 18:57
group_by-summarize_vs_slab.R library(aqp) library(soilDB) kssldat <- fetchKSSL(mlra=c("18","22A")) # remove invalid profiles kssldat <- filter(kssldat, checkHzDepthLogic(kssldat)$valid) # this truncates the whole SPC to [0,30] kssltrunc <- trunc(kssldat, 0, 30) # new way to do it: group_by + summarize ksslgroup <- group_by(kssltrunc, "mlra") # profile identity split # ksslgroup <- group_by(kssltrunc, idname(kssltrunc)) # summarize takes one or more expressions that resolve to 1 value per group res1 <- summarize(ksslgroup, oc_mean = mean(oc, na.rm = TRUE), ph_h2o_mean = mean(ph_h2o, na.rm = TRUE), oc_sd = sd(oc, na.rm = TRUE), ph_h2o_sd = sd(ph_h2o, na.rm = TRUE)) # slab way to do it; using 0-30cm trunc'd SPC slabres <- aqp::slab(ksslgroup, mlra ~ oc + estimated_oc + estimated_om + ph_h2o, slab.fun = function(x) { foo <- c(mean(x, na.rm = TRUE), sd(x, na.rm = TRUE)) names(foo) <- c("Mean","SD") return(foo) }) res2 <- do.call('rbind', lapply(split(slabres, f = slabres$mlra), function(mlra) { data.frame(mlra = unique(mlra$mlra), oc_mean = mean(subset(mlra, variable == "oc")$Mean, na.rm = TRUE), ph_h2o_mean = mean(subset(mlra, variable == "ph_h2o")$Mean, na.rm = TRUE), oc_sd = mean(subset(mlra, variable == "oc")$SD, na.rm = TRUE), ph_h2o_sd = mean(subset(mlra, variable == "ph_h2o")$SD, na.rm = TRUE)) })) # this is using un-sliced data res1 # mlra oc_mean ph_h2o_mean oc_sd ph_h2o_sd #1 18 1.748223 6.097802 3.800573 0.5514775 #2 22A 2.970130 5.700705 3.315585 0.5555779 # this is using sliced data -- similar values res2 # mlra oc_mean ph_h2o_mean oc_sd ph_h2o_sd # 18 18 1.682727 6.090462 4.044607 0.5393427 # 22A 22A 2.972001 5.696293 3.167955 0.5505772

Basic point-location REST requests to SoilGrids v2 API and wrangling of data into SoilProfileCollection object

Tue, 08/18/2020 - 16:03
soilgrids_REST.R # basic point-location REST requests to SoilGrids v2 API and wrangling of data into SoilProfileCollection object # last update: 2020/08/18 library(httr) library(jsonlite) library(tidyverse) # point id, latitude and longitude as inputs your.points <- tribble(~id, ~lat, ~lon, "A", 37.9, -120.3, "B", 38.1, -121.5) res <- lapply(split(your.points, f = your.points$id), function(yd) { id <- yd$id lat <- yd$lat lon <- yd$lon response <- httr::GET(sprintf("https://rest.soilgrids.org/soilgrids/v2.0/properties/query?lat=%s&lon=%s", lat, lon)) r.content <- httr::content(response, as = "text", encoding = "UTF-8") res <- jsonlite::fromJSON(r.content) extractSGLayerProperties <- function(x) { out <- res$properties$layers[res$properties$layers$name == x,]$depths[[1]] # fix names and labels for downstream out <- out[,colnames(out)[grep("range", colnames(out), invert = TRUE)]] out <- data.frame(label = gsub("cm", "", out$label), values = out$values) colnames(out) <- gsub("\\.Q0\\.", "Q", colnames(out)) colnames(out) <- gsub("Q5", "Q50", colnames(out)) colnames(out) <- gsub("values", x, colnames(out)) return(out) } soc <- extractSGLayerProperties("soc") bdod <- extractSGLayerProperties("bdod") phh2o <- extractSGLayerProperties("phh2o") clay <- extractSGLayerProperties("clay") cec <- extractSGLayerProperties("cec") # create new horizon data, merge in each property using depth label hz.data <- tibble(id = id, lat = lat, lon = lon, label = soc[,"label"]) hz.data <- hz.data %>% merge(soc, by = "label") %>% merge(bdod, by = "label") %>% merge(phh2o, by = "label") %>% merge(clay, by = "label") %>% merge(cec, by = "label") rownames(hz.data) <- NULL return(hz.data) }) # combine horizon data together hz.data.all <- do.call('rbind', res) # calculate top and bottom depths from label spc <- separate(hz.data.all, label, sep = "-", into = c("top", "bottom")) ### ### spc is a tibble with all your data in it ###a # from here, you can do your analysis e.g. with aqp # install if needed: # install.packages(aqp) # remotes::install_github("ncss-tech/aqp", dependencies=FALSE) library(aqp) # promote horizon data to SoilProfileCollection depths(spc) <- id ~ top + bottom # plot median/50th percentile SOC for spc truncated to [0,50] plot(trunc(spc, 0, 50), color = "socQ50") # plot low/5th percentile plot(trunc(spc, 0, 50), color = "socQ05") # plot high/95th percentile plot(trunc(spc, 0, 50), color = "socQ95")
Categories: Recent activity

Attempts to build a decently-usable, but not quite fetchNASIS-level, gNATSGO SoilProfileCollection for stress testing the S4 methods

Mon, 08/03/2020 - 19:33
big-SPC-gNATSGO.R library(aqp) library(sf) library(soilDB) library(data.table) library(microbenchmark) # path to gNATSGO Geodatabase dsn <- "~/geodata/gNATSGO/2020/gNATSGO_CONUS.gdb" # This is one of the main bottlenecks (not the only one) to reading out of GDB # chorizon is a pretty large amount of data. Let's save it as a flat file (2.1GB uncompressd) # # in principle, I think all tables could be pre-read out of the GDB and then joined to a # SPC "skeleton" based off of the chorizon data. # # system.time(h <- sf::read_sf(dsn = dsn, layer = "chorizon", # stringsAsFactors = FALSE, as_tibble = FALSE)) # # user system elapsed # # 216.374 5.678 222.533 # # data.table::fwrite(h, file="~/geodata/gNATSGO/2020/horizons.csv") # read the data in from flat file (TODO: consider SQLite storage of all tables and select * from chorizon) starttime <- Sys.time() gnatsgo <- data.table::fread(file = "~/geodata/gNATSGO/2020/horizons.csv") print(Sys.time() - starttime) # Time difference of 9.053402 secs system.time(depths(gnatsgo) <- cokey ~ hzdept_r + hzdepb_r) # user system elapsed # 78.714 1.057 77.307 system.time(l <- get_legend_from_GDB(dsn, "areasymbol != ''")) # user system elapsed # 0.196 0.000 0.195 system.time(m <- get_mapunit_from_GDB(dsn, "muname != ''")) # user system elapsed # 6.555 0.077 6.926 system.time(co <- get_component_from_GDB(dsn = dsn, "compname != ''")) # user system elapsed # 35.453 0.384 36.051 # need chunking # cgd <- soilDB:::.get_cogeomordesc_from_GDB(dsn, co = co) # pmg <- soilDB:::.get_copmgrp_from_GDB(gdb.path, co = co) system.time(site(gnatsgo) <- co) # user system elapsed # 4.334 0.004 3.248 system.time(site(gnatsgo) <- m) # user system elapsed # 3.247 0.000 2.700 system.time(site(gnatsgo) <- l) # user system elapsed # 2.838 0.000 2.499 microbenchmark::microbenchmark({ a <- gnatsgo[round(runif(1, 1, length(gnatsgo))), 1]}, { b <- gnatsgo[round(runif(1, 1, length(gnatsgo))), 1:2]}, { c <- gnatsgo[round(runif(2, 1, length(gnatsgo))), 1]}, { d <- gnatsgo[round(runif(2, 1, length(gnatsgo))), 1:2]}, times = 10) # one or two [random] pedons; one or two horizons [first or first and second] # # Unit: milliseconds # expr min lq mean median uq max neval cld # { a <- gnatsgo[round(runif(1, 1, length(gnatsgo))), 1] } 116.9621 126.7610 131.7065 135.5488 136.5518 139.0037 10 a # { b <- gnatsgo[round(runif(1, 1, length(gnatsgo))), 1:2] } 121.5565 134.4841 135.2277 137.6504 139.2550 141.0407 10 a # { c <- gnatsgo[round(runif(2, 1, length(gnatsgo))), 1] } 117.7989 137.6253 140.2133 138.9760 151.9303 156.8037 10 ab # { d <- gnatsgo[round(runif(2, 1, length(gnatsgo))), 1:2] } 132.5007 135.7209 149.1425 154.1004 155.4409 161.5209 10 b # open-ended j indexing (generally a pretty slow operation) microbenchmark::microbenchmark({foo <- gnatsgo[,1] foo <- NULL}, {foo <- gnatsgo[,2:3] foo <- NULL}, times = 5) # first horizon or second and third horizon, all profiles # Unit: seconds # expr min lq mean median uq max neval cld # { foo <- gnatsgo[, 1] foo <- NULL } 14.97619 15.46312 15.78757 16.09652 16.13384 16.26816 5 a # { foo <- gnatsgo[, 2:3] foo <- NULL } 18.65325 18.68035 18.85315 18.83340 19.03189 19.06688 5 b
Categories: Recent activity

Compare "mixing" methods -- using a simple depth-weighted average / dominant hue approach and the powerful, but somewhat costly, method in aqp::aggregateColo

Fri, 07/24/2020 - 15:58
mixed-colors-mollic.R library(aqp) library(soilDB) ## aqp_mixed_colors # # The goal here is to compare dry and moist colors when numerically "mixed" over certain depth intervals # `aqp_mixed_colors` is vectorized over profiles -- performs color "mixing" by profile on interval [ztop, zbot] # # And to compare "mixing" methods -- using a simple depth-weighted average / dominant hue approach # and the powerful, but computationally costly, method in aqp::aggregateColor ## Methods ## 1. depth-weighted average value and chroma + dominant hue in interval ("simple" aggregation) ## 2. conversion of Munsell -> RGB -> CIELAB2000 mix -> RGB -> Munsell (via aqp::aggregateColor) # # Returning results in a three part list reflecting the various methods and color data used. # aqp_mixed_colors <- function(p, ztop = 0, zbot = 18, m_hue = "m_hue", m_value = "m_value", m_chroma = "m_chroma", d_hue = "d_hue", d_value = "d_value", d_chroma = "d_chroma") { # trunc: new wrapper function around glomApply for constant top and bottom depth p.trc <- trunc(p, ztop, zbot) # calculate depth weights (horizon thickness) hzd <- horizonDepths(p) p.trc$wt <- p.trc[[hzd[2]]] - p.trc[[hzd[1]]] # iterate over profiles, calculating depth-weighted average value/chroma + dominant hues mixed <- profileApply(p.trc, frameify = TRUE, # frameify = TRUE means return data.frame result for _each profile_ function(p.sub) { dwt <- aggregate(p.sub[["wt"]], by=list(p.sub[[d_hue]]), sum) mwt <- aggregate(p.sub[["wt"]], by=list(p.sub[[m_hue]]), sum) if (!nrow(dwt)) dwt <- data.frame(Group=NA, x = 1) if (!nrow(mwt)) mwt <- data.frame(Group=NA, x = 1) # construct result res <- data.frame( id = profile_id(p.sub), # profile ID n_hz = nrow(p.sub), # number of horizons dominant_d_hue = dwt[which(dwt$x == max(dwt$x, na.rm = TRUE))[1], 1], dominant_m_hue = mwt[which(mwt$x == max(mwt$x, na.rm = TRUE))[1], 1], wt_d_value = weighted.mean(p.sub[[d_value]], p.sub[["wt"]]), wt_m_value = weighted.mean(p.sub[[m_value]], p.sub[["wt"]]), wt_d_chroma = weighted.mean(p.sub[[d_chroma]], p.sub[["wt"]]), wt_m_chroma = weighted.mean(p.sub[[m_chroma]], p.sub[["wt"]])) # put idname into ID name slot in result names(res)[1] <- idname(p.sub) return(res) }) ### calculate colors mixed in LAB space ## moist p.trc$m_lab <- aqp::munsell2rgb(p.trc[[m_hue]], p.trc[[m_value]], p.trc[[m_chroma]]) m_res <- aqp::aggregateColor(p.trc, groups = idname(p.trc), k = 1, col = "m_lab")$aggregate.data m_res_missing <- which(!profile_id(p.trc) %in% m_res[[idname(p.trc)]]) # deal with missing values if(length(m_res_missing)) { m_res.tmp <- m_res[NA,][1:length(m_res_missing),] m_res.tmp[[idname(p.trc)]] <- profile_id(p.trc)[m_res_missing] m_res <- rbind(m_res, m_res.tmp) m_res <- m_res[match(profile_id(p.trc), m_res[[idname(p.trc)]]),] # reapply original order } ## dry p.trc$d_lab <- aqp::munsell2rgb(p.trc[[d_hue]], p.trc[[d_value]], p.trc[[d_chroma]]) d_res <- aqp::aggregateColor(p.trc, groups = idname(p.trc), k = 1, col = "d_lab")$aggregate.data d_res_missing <- which(!profile_id(p.trc) %in% d_res[[idname(p.trc)]]) # deal with missing values if(length(d_res_missing)) { d_res.tmp <- m_res[NA,][1:length(d_res_missing),] d_res.tmp[[idname(p.trc)]] <- profile_id(p.trc)[d_res_missing] d_res <- rbind(d_res, d_res.tmp) d_res <- d_res[match(profile_id(p.trc), d_res[[idname(p.trc)]]),] # reapply original order } # construct final result res <- list() res$depth_weighted <- mixed res$m_aggregate <- m_res res$d_aggregate <- d_res return(res) } # a fake profile dat0 <- data.frame(id = 1, top = 0, bottom = 50, d_hue = "10YR", m_hue = "10YR", d_value = 5, m_value = 3, d_chroma = 3, m_chroma = 3) # a real profile dat1 <- data.frame(id = 2, top = c(0, 7, 27, 43), bottom = c(7, 27, 43, 59), d_hue = c("10YR","10YR","10YR","10YR"), m_hue = c("10YR","10YR","10YR","10YR"), d_value = c(7, 4, 4, 5), m_value = c(6, 2.5, 2.5, 4), d_chroma = c(2, 3, 3, 3), m_chroma = c(2, 2, 2, 3)) # same color data, shuffled depths a bit so more horizons in surface 0-18cm dat2 <- data.frame(id = 3, top = c(0, 5, 12, 16), bottom = c(5, 12, 16, 59), d_hue = c("10YR","10YR","10YR","10YR"), m_hue = c("10YR","10YR","10YR","10YR"), d_value = c(7, 4, 4, 5), m_value = c(6, 2.5, 2.5, 4), d_chroma = c(2, 3, 3, 3), m_chroma = c(2, 2, 2, 3)) dat <- rbind(dat0, dat1, dat2) depths(dat) <- id ~ top + bottom dat_test <- aqp_mixed_colors(dat) data("loafercreek", package = "soilDB") res <- aqp_mixed_colors(loafercreek) #f <- fetchNASIS() #res <- aqp_mixed_colors(f) #' Construct a Munsell Hue Value/Chroma Code #' #' @param the_hue Character vector of hue values (in the Munsell notation e.g. "10YR") #' @param the_value Numeric vector of value values #' @param the_chroma Numeric vector of chroma values #' @param digits Number of digits to round value and chroma off to (default: 0; integer values) #' #' @return A character vector of Munsell color codes in the format \code{HUE VALUE/CHROMA} #' @export hvc_to_munsell #' #' @examples #' #' hvc_to_munsell("10YR", 2, 2) #' # [1] "10YR 2/2" #' hvc_to_munsell <- function(the_hue, the_value, the_chroma, digits = 0, allow57chroma = FALSE) { chroma <- the_chroma if(!allow57chroma) { # chromas of 5 and 7 are not chips available in soil color book # so divide input chromas in two, round, # multipy by 2 to turn 5 -> [4 or 6] or 7 -> [6 or 8] idx <- which(round(chroma, digits) %in% c(5,7)) if(length(idx)) chroma[idx] <- 2 * round(chroma[idx] / 2, digits) } res <- sprintf("%s %s/%s", the_hue, round(the_value, digits), round(chroma, digits)) # allow for null chroma, convert other missing values to NA res[is.na(the_hue) | is.na(the_value)] <- NA return(res) } # this will round to nearest integer as needed d_depthweight <- hvc_to_munsell(res$depth_weighted$dominant_d_hue, res$depth_weighted$wt_d_value, res$depth_weighted$wt_d_chroma) m_depthweight <- hvc_to_munsell(res$depth_weighted$dominant_m_hue, res$depth_weighted$wt_m_value, res$depth_weighted$wt_m_chroma) good.m.idx <- which(!grepl("NA", m_depthweight) & !is.na(m_depthweight)) good.d.idx <- which(!grepl("NA", d_depthweight) & !is.na(d_depthweight)) # compare depth-weighted dry versus moist -- just as a general sanity check idx <- good.m.idx[good.m.idx %in% good.d.idx][5:10] # idx <- c(7, 23, 70, 78, 96) # nice test IDs for loafercreek that demonstrate some things test <- data.frame(id = res$depth_weighted$peiid[idx], dry = d_depthweight[idx], moist = m_depthweight[idx]) aqp::colorContrastPlot(test$dry, test$moist, # the moist colors (m2) are darker, as expected labels = c("Dry", "Moist")) # this is a known "easter egg" in the loafercreek dataset # representing a fairly easy-to-commit data entry error (v.s. 7.5YR) # note that the test indices includes one bogus "7.5R" hue in dry and has prominent contrast/high # get the aggregated munsell chips from the 0-18cm interval d_aqp <- hvc_to_munsell(res$d_aggregate$munsell.hue, res$d_aggregate$munsell.value, res$d_aggregate$munsell.chroma) m_aqp <- hvc_to_munsell(res$m_aggregate$munsell.hue, res$m_aggregate$munsell.value, res$m_aggregate$munsell.chroma) # compare depth-weighted dry versus aqp-mixed dry moist_data <- data.frame(id = res$depth_weighted$peiid, dwt = m_depthweight, aqp = m_aqp, ord = res$m_aggregate$munsell.sigma) dry_data <- data.frame(id = res$depth_weighted$peiid, dwt = d_depthweight, aqp = d_aqp, ord = res$d_aggregate$munsell.sigma) # remove NA values (mostly from weighted.average missing values) moist_data <- moist_data[good.m.idx,] dry_data <- dry_data[good.d.idx,] # order the data based on munsell.sigma moist_data <- moist_data[order(moist_data$ord),] dry_data <- dry_data[order(dry_data$ord),] idx2 <- seq(1,nrow(moist_data),8) aqp::colorContrastPlot(moist_data$dwt[idx2], moist_data$aqp[idx2], printMetrics = F, col.cex = 0.0, labels = c("nearest chip\ndepth-weighted mean V + C\nw/ dominant H", "aggregateColor")) text(0.3,0.42, "Comparison of 0-18cm Numerical Mixing Methods for the Mollic/Umbric Epipedon") idx3 <- seq(1,nrow(dry_data),8) aqp::colorContrastPlot(dry_data$dwt[idx3], dry_data$aqp[idx3], printMetrics = F, col.cex = 0.0, labels = c("nearest chip\ndepth-weighted mean V + C\nw/ dominant H", "aggregateColor")) text(0.3,0.42, "Comparison of 0-18cm Numerical Mixing Methods for the Mollic/Umbric Epipedon")
Categories: Recent activity

Example: A wrapper method (should be dispatched via S4) for munsell2rgb for the SoilProfileCollection object

Wed, 07/15/2020 - 22:29
munsell2rgb-SPC.R # I just named this "munsell2rgb2"... but there is no reason why it cant be # "munsell2rgb,SoilProfileCollection-method" in aqp munsell2rgb2 <- function(object, hue = "hue", value = "value", chroma = "chroma", as.spc = TRUE) { h <- horizons(object) # makes a data.frame drgb <- munsell2rgb(h[[hue]], h[[value]], h[[chroma]], returnLAB = TRUE) idn <- idname(object) hidn <- hzidname(object) # munsell2rgb does not return ID names (not inherently aware of the SPC) idcol <- data.frame(h[[idn]], h[[hidn]]) colnames(idcol) <- c(idn, hidn) if(as.spc) { # horizons<- will ensure merge.data.table triggers if @horizons is data.table horizons(object) <- cbind(idcol, drgb) return(object) } else { # TODO: remove ::: return(aqp:::.as.data.frame.aqp(cbind(idcol, drgb), aqp_df_class(object))) } } library(aqp) library(soilDB) data(loafercreek) # test with data.table # loafercreek@horizons <- data.table::data.table(loafercreek@horizons) # metadata(loafercreek)$aqp_df_class <- "data.table" # test with tibble # loafercreek@horizons <- tibble::as.tibble(loafercreek@horizons) # metadata(loafercreek)$aqp_df_class <- "tbl_df" # use SPC interface res <- munsell2rgb2(loafercreek, hue = "m_hue", value = "m_value", chroma = "m_chroma") # inspect (its the class we set after join) horizons(res) # use data.frame-like interface (uses aqp_df_class) res2 <- munsell2rgb2(loafercreek, hue = "m_hue", value = "m_value", chroma = "m_chroma", as.spc = FALSE) res2 class(res2) # respects df class
Categories: Recent activity

Inspired by @smroecker + https://soil.copernicus.org/articles/6/269/2020/

Tue, 07/14/2020 - 17:02
oblique_coordinates.R ogc_fun <- function(x,y,theta) { sqrt(x^2 + y^2)*cos(theta - atan(y / x)) } x <- y <- -100:100 plot(x, y, type="n", xlim=c(0,200), ylim=c(-150,150)) for(n in 1:8) lines(ogc_fun(x, y, (2*pi/n)), col=n, lty=2)
Categories: Recent activity

Use aqp to handle missing data and typical pitfalls with field profile descriptions for IO from new R package mpspline2 by @obrl_soil

Sun, 07/12/2020 - 21:20
mpspline2-with-aqp.R # making use of @obrl-soil's new mpspline2 package [soon to be on CRAN!] # # author: andrew g brown; 2020/07/12 # # this is an attempt at creating a generic method for mass-preserving / equal-area splines # on aqp SoilProfileCollection (SPC) data that may contain a large amount of missing values. # # it also provides an avenue to get 1cm continuous spline output back in a handy form (1cm sliced SPC). # # In the presence of missing values, getting the result back into the SPC is not always easy. # # The example code that follows should illustrate the major indexing pitfalls you need to # be aware of. # # I have tried to cover corner cases so please let me know if you find a way to break it! # # get development versions of soil packages + dependencies # remotes::install_github(c("ncss-tech/aqp", # "ncss-tech/soilDB", # "obrl-soil/mpspline2")) # load packages library(aqp) # for SoilProfileCollection object + wrangling code library(soilDB) # sample data library(mpspline2) # generic mass-preserving/equal area spline implementation #' Missing-data-safe, SPC-wide wrapper around mpspline2::mpspline "continuous" 1cm output #' #' @name do_aqp_mpspline #' #' @description Facilitate safe use of just about any numeric SPC horizon attribute, from any SPC, with \code{mpspline2::mpspline}. #' #' This function will automatically filter profiles with \code{NA} in attribute of interest. #' #' This may be more conservative filtering than you expect, with intention of splines over a constant interpolation interval [with essentially no gaps]. #' #' This is all with an eye toward aggregating many profile-level splines together where missing data is hard to reason over. #' #' Data completeness is assessed and the input SPC is filtered and truncated to create a container for the 1cm results from \code{mpspline2::mpspline}. #' #' @param object A SoilProfileCollection #' @param var_name Column name in \code{@horizons} slot of \code{object} containing numeric values to spline #' @param pattern Regex pattern to match for bottom of profile (passed to estimateSoilDepth) default: "R|Cr|Cd|qm" #' @param hzdesgn Column name in \code{@horizons} slot of \code{object} containing horizon designations default: \code{aqp::guessHzDesgnName(object)} #' @param ... Additional arguments to \code{mpspline2::mpspline} #' #' @author Andrew G. Brown #' #' @return A SoilProfileCollection with 1cm slices. Spline variables are in columns prefixed with "spline_" and RMSE/RMSE_IQR are in colums prefixed with "rmse_". If any profiles were removed from the collection, their profile IDs are stored in attr(result, 'removed'). #' #' @export do_aqp_mpspline #' #' @examples #' #' library(aqp) #' data(sp1) #' depths(sp1) <- id ~ top + bottom #' #' res <- do_aqp_mpspline(sp1, "prop") #' plotSPC(res[1:5,], color = "spline_prop", divide.hz = FALSE) #' do_aqp_mpspline <- function(object, var_name = NULL, pattern = "R|Cr|Cd|qm", hzdesgn = guessHzDesgnName(object), ...) { if(is.null(var_name) | !(var_name %in% horizonNames(object))) stop("argument `var_name` must specify a single horizon-level variable", call.=FALSE) hztop <- horizonDepths(object)[1] hzbot <- horizonDepths(object)[2] # glom to "available interval" in each profile # NOTE: we will handle warnings (profiles with no data at all) at end spc.sub <- suppressWarnings(glomApply(object, function(p) { i <- which(diff(c(0, cumsum(!is.na(p[[var_name]])))) == 1) h <- horizons(p) # basically this excludes NA values at top and bottom of profile # (O horizons, bedrock) but wont check missing values inbetween # need at least two horizons to make a spline if(length(i) < 2) return(c(0,0)) top_depth <- h[[hztop]][i[1]] bot_depth <- h[[hzbot]][i[length(i)]] return(c(top_depth, bot_depth)) })) # debug : inspect horizon values for var_name #plot(spc.sub[1:10,], color=var_name) # only take profiles that have 100% data coverage in above interval # i.e. if a single horizon is missing data, remove whole profile spc.sub$nona <- profileApply(spc.sub, function(p) any(is.na(p[[var_name]]))) spc.sub <- spc.sub[which(!spc.sub$nona),] # calculate the deepest top depth and shallowest bottom depth mindepth <- max(profileApply(spc.sub, function(p) p[,1][[hztop]])) maxdepth <- min(profileApply(spc.sub, estimateSoilDepth, p = pattern, name = hzdesgn)) # we will only make interpolations that the "whole SPC supports" # the thought is that these 1cm slices will be further aggregated downstream spc.sub <- glomApply(spc.sub, function(p) c(mindepth, maxdepth), truncate = TRUE) # do the splines res <- mpspline2::mpspline(horizons(spc.sub)[c(idname(spc.sub), horizonDepths(spc.sub), var_name)], var_name = var_name, ...) # concatenate results for re-insertion res2 <- do.call('c', lapply(profile_id(spc.sub), function(pid) { drange <- mindepth:maxdepth zero.idx <- drange == 0 if(any(zero.idx)) drange <- drange[-which(zero.idx)] return(res[[pid]]$est_1cm[drange]) # this returns the 1cm estimate which conforms with sliced spc # # debug: prove that mass is preserved in output by returning block estimates # return(res[[pid]]$est_icm) })) # get the RMSE reserr <- do.call('c', lapply(profile_id(spc.sub), function(pid) { return(res[[pid]]$est_err) })) # make 1:1 with site reserr_iqr <- reserr[names(reserr) == "RMSE_IQR"] reserr <- reserr[names(reserr) == "RMSE"] # inspect #reserr_iqr #reserr # single horizon results cannot be splined, filter those out spc.sub <- filter(spc.sub, profile_id(spc.sub) %in% names(res)) # adjustment for aqp::slice index logic versus glom interval logic if(mindepth == 0) { maxdepth <- maxdepth - 1 } # create slices 1cm thick to insert spline result spc.spl <- aqp::slice(spc.sub, formula(sprintf("%s:%s ~ %s", mindepth, maxdepth, var_name))) # create new "spline_"+var_name variable spc.spl[[paste0("spline_",var_name)]] <- res2 # create new "rmse_"+var_name as site level attributes spc.spl[[paste0("rmse_",var_name)]] <- reserr spc.spl[[paste0("rmse_iqr_",var_name)]] <- reserr_iqr # determine what profiles were removed removed <- profile_id(object)[!profile_id(object) %in% profile_id(spc.spl)] # add an attribute with removed profile IDs. there are three steps # that possibly remove data: # - profiles removed by glomApply have no var_name data at all. # - 100% coverage filtering step -- conservative filter to keep from making splines from bad data # - mpspline itself will remove profiles with e.g. just one horizon attr(spc.spl, "removed") <- unique(removed) return(spc.spl) } ### ### DEMO CODE ### ### make a combined comparison profile plot ### raw pedon data v.s. 1cm-slice + mass preserving spline # load sample dataset data(loafercreek, package = "soilDB") # set all O horizons to 10 percent clay and pH 5.5 # this isnt "necessary" but allows for interpolation to 0cm in loafercreek loafercreek$clay[grep("O", loafercreek$hzname)] <- 10 loafercreek$phfield[grep("O", loafercreek$hzname)] <- 5.5 # use aqp wrapper function for SPC input to mpspline res1 <- do_aqp_mpspline(loafercreek, "clay") # NOTE: 7 profiles are dropped from loafercreek -> res # all of them contain no clay data at all attr(res1, 'removed') # inspect distribution of RMSE (one for each profile) plot(density(res1$rmse_clay)) abline(v=quantile(res1$rmse_clay)) # set graphics parameters to minimize whitespace/leave room for legend par(mar = c(1,1,3,1)) # make sure the removed profiles deserved to be removed plot(filter(loafercreek, profile_id(loafercreek) %in% attr(res1,'removed')), color = "clay") # NOTE: you can run this on anything numeric... but with big datasets # just a couple pedons missing data at a few depths will limit your range res2 <- do_aqp_mpspline(loafercreek, "phfield") plot(res2, color = "spline_phfield", n.legend = 5, divide.hz=FALSE, print.id=FALSE) attr(res2, 'removed') # more complete SPC gives broader range of depths res3 <- do_aqp_mpspline(loafercreek[20:50], "phfield") plot(res3[7:14,], color = "spline_phfield", n.legend = 5) attr(res3, 'removed') # take just a few profiles from the result for plot res.sub <- res1[3:8,] # get the original profiles original <- filter(loafercreek, profile_id(loafercreek) %in% profile_id(res.sub)) # determine max depth in spline output (function of NA values) max_d <- max(res.sub$hzdepb) # use glom to truncate inputs to just interval matching spline output orig.glom <- glomApply(original, function(p) { return(c(0, max_d)) }, truncate = TRUE) # set new profile IDs for the spline'd profiles profile_id(res.sub) <- paste0(profile_id(res.sub),"_mpspline") # put spline results into variable for plotting orig.glom$clay_combined <- orig.glom$clay res.sub$clay_combined <- res.sub$spline_clay # inspect plot(res.sub[,48]) # use aqp::union to merge multiple SPCs spc.combined <- aqp::union(list(orig.glom, res.sub)) # avoid warnings about duplicate phiid hzidname(spc.combined) <- "hzID" # make the comparison plot plotSPC(spc.combined, color = "clay_combined", plot.order = c(1,7,2,8,3,9,4,10,5,11,6,12)) # different way of looking at things dev.off() # set up an xy plot plot(y=spc.combined$hzdept, x=spc.combined$spline_clay, type="n", xlab="Total Clay Content (%)", ylab="Depth (cm)", ylim=c(50,0), xlim=c(10,35)) # add line for profiles # 7 and 8 lapply(7:8, function(i) { lines(y=spc.combined[i,]$hzdept, x=spc.combined[i,]$clay, col=i, lwd=2) lines(y=spc.combined[i,]$hzdept, x=spc.combined[i,]$spline_clay, col=i, lty=2) } )
Categories: Recent activity

The following works as expected.

Fri, 06/19/2020 - 01:57
get_component_from_SDA_test-ES-subquery.R ## add support for ecosite-based queries in fetchSDA_Component / get_component_from_SDA; ## only uses NRCS forest/range sites; test for possible many:one duplication issues ## brownag committed on Jun 3, 2019 ## https://github.com/ncss-tech/soilDB/commit/4ad25e7b628e854fe0d1d21bdd2484022acfa922 devtools::install_github('ncss-tech/soilDB@4ad25e7b628e854fe0d1d21bdd2484022acfa922', dependencies = FALSE) library(soilDB) f <- get_component_from_SDA(WHERE = "areasymbol LIKE 'CA%'")
Categories: Recent activity

gower_tests.R

Wed, 06/17/2020 - 10:36
gower_tests.R library(aqp) data(sp2) depths(sp2) <- id ~ top + bottom site(sp2) <- ~ surface hzdesgnname(sp2) <- "name" # update soil colors that we have data for plot sp2$soil_color <- munsell2rgb(sp2$hue, sp2$value, sp2$chroma) # calculate some site attributes sp2$depth <- profileApply(sp2, estimateSoilDepth, p = "^C|2C|3C") sp2$darkness <- profileApply(sp2, thompson.bell.darkness, pattern = "^A|^2A", value = "value", chroma = "chroma") # calculate a horizon attribute sp2$redness <- hurst.redness(sp2$hue, sp2$value, sp2$chroma) # default order plot(sp2[1:10,], label = "surface") # calculate gower distance on sliced spc (for hz attributes) gdist <- gower_distance(slice(sp2[1:10,], 0:150 ~ redness + prop + field_ph), c('darkness','redness','prop','field_ph')) # ordered by quasi-gower distance # convincing separation of parent materials/age # use depth -- in this case this is pattern for ~solum thickness # also, use thompson-bell profile darkness index (site-level) # also, use hurst redness index (by horizon, traditionally for dry colors) # finally, clay content from horizon (prop) # # we order with respect to first profile, which was a holocene plot(sp2[1:10,], label = "surface", plot.order = order(gdist[1, ]), color = "prop") # if you order wtih respect to different profile, the relative order is different plot(sp2[1:10,], label = "surface", plot.order = order(gdist[4, ]), color = "prop") # if you order wtih respect to different profile, the relative order is different plot(sp2[1:10,], label = "surface", plot.order = order(gdist[2, ]), color = "prop") library(soilDB) data(loafercreek) loafercreek <- rebuildSPC(loafercreek) my.hz.vars <- c("clay", "phfield") loaferclean <- filter(loafercreek, checkHzDepthLogic(loafercreek), evalMissingData(loafercreek, my.hz.vars) > 0.9) spc <- loaferclean spc$darkness <- profileApply(loaferclean, thompson.bell.darkness) spc$redness <- hurst.redness(loaferclean$d_hue, loaferclean$d_value, loaferclean$d_chroma) all.vars <- c("slope_field", "darkness", "redness") gdist <- gower_distance(spc, all.vars) dorder <- order(gdist[1,]) plot(loaferclean, plot.order = dorder, color="clay")
Categories: Recent activity

gower.R

Wed, 06/17/2020 - 10:28
gower.R #' Calculate quasi-Gower's Distance for all profiles in a SoilProfileCollection #' #' @description This experimental function calculates pairwise distances between profiles in a SoilProfileCollection by a method related to Gower's Distance metric. One or more site or horizon level attributes are supplied as arguments for use in the distance calculation. Both numeric and categorical attributes are allowed -- with "difference" values calculated for numeric values and for "different" and "equal" categories \code{1} and \code{0} are assigned respectively. #' #' This is a SoilProfileCollection-specific implementation of the concept. The distance calculations specified in the method are first applied to horizon data, and then to the site data, to produce a "final" distance matrix. #' #' The pairwise distance of all horizons in the collection is reduced to a single value for each profile-level comparison. The horizon data distance matrix is scaled by both the range within individual variables (first), and the total thickness of each horizon (second). #' #' An aggregated horizon-to-site level value is added to a site-level distance calculation applied by the same method. Default weighting is assigns all site-level attributes equal weight, which has a tendency to favor site level attributes over the aggregate horizon attribute, so take care when mixing these two. #' #' @param spc A SoilProfileCollection #' @param attr A character vector containing site or horizon attribute names to use in calculating distance. #' #' @return A numeric n x n distance matrix where n is \code{length(spc)}. May contain NaN. #' #' @author Andrew G. Brown #' #' @export gower_distance #' #' @examples #' #' library(aqp) #' #' data(sp2) #' #' depths(sp2) <- id ~ top + bottom #' site(sp2) <- ~ surface #' hzdesgnname(sp2) <- "name" #' #' # update soil colors that have data for plot #' sp2$soil_color <- munsell2rgb(sp2$hue, sp2$value, sp2$chroma) #' #' # calculate some site attributes #' sp2$depth <- profileApply(sp2, estimateSoilDepth, p = "^C|2C|3C") #' sp2$darkness <- profileApply(sp2, thompson.bell.darkness, pattern = "^A|^2A", #' value = "value", chroma = "chroma") #' #' # calculate a horizon attribute #' sp2$redness <- hurst.redness(sp2$hue, sp2$value, sp2$chroma) #' #' # default order #' plot(sp2[1:10,], label = "surface") #' #' # ordered by quasi-gower distance #' # reasonably convincing separation of parent materials #' # using depth -- in this case this is pattern for ~solum thickness #' # also, thompson-bell profile darkness index (site-level) #' # also, hurst redness index (by horizon, traditionally for dry colors) #' # finally, texture and clay content from horizon #' #' plot(sp2[1:10,], #' label = "surface", #' plot.order = order(gower.distance(sp2[1:10,], #' c('depth','darkness','redness','texture', #' 'prop'))[1, ])) #' #' ## another example with random data #' spc <- do.call('rbind', lapply(letters[1:3], random_profile)) #' depths(spc) <- id ~ top + bottom #' #' ## make a numeric site attribute (something like slope gradient?) #' spc$siteval1 <- profileApply(spc, function(x) { #' runif(1, 3, 65) #' }) #' #' ## make a horizon level category (something like structure grade?) #' spc$catval1 <- letters[round(runif(nrow(spc), 5, 8))] #' #' ## all numeric, 1 site + 3 horizon attributes #' gower_distance(spc, c("siteval1","p1","p2","p3")) #' #' ## 1 site + 4 horizon, 1 horizon attribute categorical #' gower_distance(spc, c("siteval1","catval1","p1","p2","p3")) #' #' ## without site attribute #' gower_distance(spc, c("catval1","p1","p2","p3")) #' #' ## just numeric horizon data #' gower_distance(spc, c("p1","p2","p3")) #' gower_distance <- function(spc, attr, sitewt = 0.5) { h <- horizons(spc) s <- site(spc) hid <- h[[idname(spc)]] hzvars <- attr[attr %in% horizonNames(spc)] # calculate pairwise difference matrix for horizon attribute hdmat <- Reduce('+', lapply(hzvars, function(v) { var <- h[[v]] if (is.numeric(var)) { ov <- abs(outer(var, var, "-")) / diff(range(var, na.rm = TRUE)) } else { ov <- outer(var, var, "!=") } return(ov) })) # perform horizon-distance weighting hdepths <- horizonDepths(spc) h$.marginDist <- colSums(hdmat, na.rm = TRUE) / (h[[hdepths[2]]] - h[[hdepths[1]]]) # aggregate the horizon-level distances for all attributes to a single site var s$.hzDist <- as.numeric(aggregate(h$.marginDist, by = list(hid), sum, na.rm = TRUE)$x) # calculate pairwise difference matrix for aggregate horizon + site attributes svars <- attr[attr %in% siteNames(spc)] svarmat <- lapply(c('.hzDist', svars), function(v) { var <- s[[v]] if (is.numeric(var)) { ov <- abs(outer(var, var, "-")) / diff(range(var, na.rm = TRUE)) } else { ov <- outer(var, var, "!=") } return(ov) }) sdmat <- Reduce('+', svarmat[2:length(svarmat)]) # if there were any site variables in the attr vector... if (length(sdmat) > 0) { sdmat <- (sdmat * sitewt) + (svarmat[[1]] * (1 - sitewt)) } else { sdmat <- svarmat[[1]] } # the final result is an n x n distance metric return(sdmat) }
Categories: Recent activity

rbind-fill-benchmark.R

Tue, 06/16/2020 - 06:53
rbind-fill-benchmark.R library(microbenchmark) library(data.table) library(plyr) # make some test data -- 200k records # we will try rbind-filling two of these test.data <- vector('list', length = 200) test.data[] <- lapply(1:200, rnorm, n = 1000) test.data <- as.data.frame(test.data, stringsAsFactors = FALSE) # two sets of names names1 <- as.character(1:200) names2 <- as.character(201:400) test2 <- test1 <- test.data colnames(test1) <- names1 colnames(test2) <- names2 # compare what it "costs" for on-the-fly coercion to data.table test1.dt <- as.data.table(test1) test2.dt <- as.data.table(test2) # 'bench it -- 100 reps microbenchmark( # data.table::rbind.data.table "data.table" = rbind(test1.dt, test2.dt, fill = TRUE), # data.table::rbind.data.table + conversion of two data.frames "as.data.table" = rbind(as.data.table(test1), as.data.table(test2), fill = TRUE), # data.table::rbindlist "data.table::rbindlist" = rbindlist(list(test1.dt, test2.dt), fill = TRUE), # data.table::rbindlist + coercion "data.table::rbindlist + coercion" = rbindlist(list(test1, test2), fill = TRUE), # plyr::rbind.fill "plyr" = rbind.fill(test1, test2), times = 100 ) # Unit: milliseconds # expr min lq mean median uq max neval # data.table 5.401874 5.752023 7.052155 5.865167 6.045043 22.52242 100 # as.data.table 9.842593 10.114694 11.703388 10.313812 11.054234 29.69442 100 # data.table::rbindlist 5.419195 5.691052 7.322580 5.866808 6.099170 24.41400 100 # data.table::rbindlist + coercion 5.447201 5.662593 7.680818 5.867926 6.308414 23.63164 100 # plyr 49.178042 49.744839 56.202575 50.105359 50.856851 418.81422 100
Categories: Recent activity

chunkApply -- new version of profileApply that scales linearly with tens of thousands of profiles

Sun, 01/19/2020 - 02:42
chunkApply_demo.R chunkApply <- function(object, FUN, simplify = TRUE, frameify = FALSE, chunk.size = 100, column.names = NULL, ...) { if(simplify & frameify) { message("simplify and frameify are both TRUE -- ignoring simplify argument", .call=FALSE) simplify <- FALSE } # break SPC of size n into chunk.size chunks n <- length(object) chunk <- sort(1:n %% max(1, round(n / chunk.size))) + 1 # operate on chunks res <- do.call('c', lapply(split(1:n, chunk), function(idx) { l <- lapply(as.list(idx), function(i) do.call(FUN, list(object[i, ], ...))) if(simplify) return(unlist(l)) return(l) })) # return profile IDs if it makes sense for result if(length(res) == length(object)) { id.name <- idname(object) names(res) <- profile_id(object) } # return horizon IDs if it makes sense for result if(length(res) == nrow(object)) { id.name <- hzidname(object) names(res) <- hzID(object) } if(frameify) { if(is.data.frame(res[[1]])) { res <- do.call('rbind', res) if(nrow(res) == nrow(object)) { res <- as.data.frame(cbind(denormalize(object, idname(object)), hzID(object), res), row.names = NULL) oldnames <- colnames(res) colnames(res) <- c(idname(object), hzidname(object), oldnames[3:length(oldnames)]) } else if(nrow(res) == length(object)) { res <- as.data.frame(cbind(denormalize(object, idname(object)), res), row.names = NULL) oldnames <- colnames(res) colnames(res) <- c(idname(object), oldnames[2:length(oldnames)]) } else { res <- as.data.frame(res) } if(!is.null(column.names)) colnames(res) <- column.names } else { message("first result is not class `data.frame` and frameify is TRUE. defaulting to list output.", .call=FALSE) } } return(res) } library(aqp) foo <- do.call('rbind', lapply(as.list(1:10000), random_profile)) depths(foo) <- id ~ top + bottom simpleFunction <- function(p) { hz <- horizons(p) (hz[,3] - hz[,2])[1] #res <- data.frame(profile_id(p), hzID(p), thk=(hz[,3] - hz[,2])) #colnames(res) <- c(idname(p), hzidname(p), 'hz_thickness') #return(res) } c1 <- system.time(goo <- chunkApply(foo[1:100,], simpleFunction, simplify = T)) cp1 <- system.time(chunkApply(foo[1:100,], simpleFunction, parallel=T)) p1 <- system.time(profileApply(foo[1:100,], simpleFunction, simplify = FALSE)) c2 <- system.time(chunkApply(foo[1:1000,], simpleFunction)) cp2 <- system.time(chunkApply(foo[1:1000,], simpleFunction, parallel=T)) p2 <- system.time(profileApply(foo[1:1000,], simpleFunction, simplify = FALSE)) c3 <- system.time(chunkApply(foo[1:10000,], simpleFunction)) cp3 <- system.time(chunkApply(foo[1:10000,], simpleFunction, parallel=T)) c4 <- system.time(chunkApply(foo, simpleFunction)) cp4 <- system.time(chunkApply(foo, simpleFunction, parallel=T)) # too dang long # p3 <- system.time(profileApply(foo, simpleFunction, simplify = FALSE)) c1 cp1 p1 c2 cp2 p2 c3 cp3 p3 library(aqp) foo <- do.call('rbind', lapply(as.list(1:100000), random_profile)) depths(foo) <- id ~ top + bottom simpleFunction <- function(p) data.frame(horizons(p)[2,2:3]) idx <- c(seq(100,900,100), seq(1000,10000,1000), seq(10000, 100000, 10000)) idx.sub <- idx[idx <= 10000] res <- do.call('rbind', lapply(as.list(idx.sub), function(i) { system.time(profileApply(foo[1:i,], simpleFunction, simplify=F)) })) res2 <- do.call('rbind', lapply(as.list(idx.sub), function(i) { system.time(chunkApply(foo[1:i,], simpleFunction)) })) plot(res[,3]~idx.sub, type="l", lwd=2, main="Time to *Apply n Profiles", xlab="Number of Profiles",ylab="Time, seconds") lines(res2[,3]~idx.sub, col="GREEN", lwd=2) legend('topleft', legend = c("profileApply","chunkApply"), lty=1, lwd=2, col=c("BLACK","GREEN"))
Categories: Recent activity

Fetching SSURGO data for arbitrarily large (multi-SSA/multi-county) extents with the FedData package (example: Central Sierra Nevada, California)

Fri, 03/08/2019 - 18:12
FedData_Demo.R ## Example: Fetch SSURGO data for arbitrarily large/complex areas with the FedData package #@author: Andrew Brown # sf package for USAboundaries library(sf) # USAboundaries for getting county geometry library(USAboundaries) # FedData for routines for accessing/stitching multi-SSA SSURGO data library(FedData) library(magrittr) # label for your files project.label <- "CentralSierra" state <- "California" county.fips <- c("009", "109", "003", "005") # use USAboundaries package to get all counties in california contemporary <- us_counties(states = c(state)) # select just polygons for fips codes specified project.template <- contemporary[contemporary$countyfp %in% county.fips,] # merge into a single polygon project.template$grpid <- 1 project.template <- st_union(project.template) ## OR you can load your data from your own shapefile with rgdal #library(rgdal) #your.template <- readOGR(dsn = "./path/to/shapefile/", layer = "filenamenoextension") # inspect template plot(st_geometry(project.template)) # get_ssurgo downloads the requisite SSAs to the folder ./RAW/SSURGO/ # then it extracts spatial and tabular data (.SHP & .CSV) into ./EXTRACTIONS/ your.ssurgo <- get_ssurgo(template = as(project.template, 'Spatial'), label = project.label, force.redo = TRUE) # create unique ID and ensure correct datatype for MUKEY mupoly <- readOGR(dsn = paste0("./EXTRACTIONS/",project.label,"/SSURGO"), layer = paste0(project.label,"_SSURGO_Mapunits"), stringsAsFactors = FALSE) mupoly$OBJECTID <- 1:nrow(mupoly) mupoly$MUKEY <- as.double(mupoly$MUKEY) writeOGR(mupoly, dsn = paste0("./EXTRACTIONS/",project.label,"/SSURGO"), layer = paste0(project.label,"_SSURGO_Mapunits"), driver="ESRI Shapefile", overwrite_layer = TRUE) # optional: save Rdata representation of get_ssurgo() result for further processing in R save(your.ssurgo, file = paste0(project.label,".Rda"))
Categories: Recent activity

create a geopackage containing muaggatt table and spatial data from SSURGO (Tuolumne County)

Fri, 03/08/2019 - 04:21
ca109_geopackage.R library(sf) library(RSQLite) library(USAboundaries) library(FedData) label <- "ca109" raw.dir <- "RAW/SSURGO" extraction.dir <- "EXTRACTIONS" output.file <- "ca109.gpkg" contemporary <- us_counties(states = c("California")) tuolumne <- contemporary[which(contemporary$countyfp == "109"),] plot(st_geometry(tuolumne)) # if you need the source data/dont know the necessary SSURGOAreas use get_ssurgo() #ca109 <- get_ssurgo(template = as(tuolumne, 'Spatial'), label = "ca109", force.redo = TRUE) # get spatial data out of existing ZIP files downloaded from WSS template <- as(tuolumne, "Spatial") template.poly <- polygon_from_extent(template) # skip the process of SDA query to determine necessary SSAs SSURGOAreas <- data.frame(areasymbol=c("CA731","CA630","CA790","CA729","CA649","CA740"), saverest=c("09/12/2018", "09/17/2018", "09/13/2018", "09/12/2018", "09/14/2018", "09/12/2018")) # extract (download if needed) SSURGOData <- lapply(1:nrow(SSURGOAreas), function(i) { message("Loading SSURGO data for survey area ", i, " of ", nrow(SSURGOAreas), ": ", as.character(SSURGOAreas$areasymbol[i])) get_ssurgo_study_area(template = template.poly, area = as.character(SSURGOAreas$areasymbol[i]), date = as.Date(SSURGOAreas$saverest[i], format = "%m/%d/%Y"), raw.dir = "./RAW/SSURGO/") }) SSURGOPolys <- lapply(SSURGOData, "[[", "spatial") message("Merging all SSURGO Map Unit polygons") SSURGOPolys <- do.call("rbind", SSURGOPolys) message("Cropping all SSURGO Map Unit polygons to template") SSURGOPolys <- raster::crop(SSURGOPolys, y=spTransform(template, proj4string(SSURGOPolys))) # inspect #plot(template) #plot(template.poly, add=T) #plot(SSURGOPolys, col="RED", add=T) # get tabular data SSURGOTables <- lapply(SSURGOData, "[[", "tabular") message("Merging all SSURGO data tables") tableNames <- unique(unlist(sapply(SSURGOTables, names))) tableNames <- tableNames[order(tableNames)] SSURGOTables <- lapply(tableNames, function(name) { tables <- lapply(SSURGOTables, "[[", name) tables <- do.call("rbind", tables) tables <- unique(tables) return(tables) }) names(SSURGOTables) <- tableNames SSURGOTables <- extract_ssurgo_data(tables = SSURGOTables, mapunits = as.character(unique(SSURGOPolys$MUKEY))) suppressWarnings(rgdal::writeOGR(SSURGOPolys, dsn = normalizePath(paste0(extraction.dir, "/.")), layer = paste0(label, "_SSURGO_Mapunits"), driver = "ESRI Shapefile", overwrite_layer = TRUE)) junk <- lapply(names(SSURGOTables), function(tab) { readr::write_csv(SSURGOTables[[tab]], path = paste(extraction.dir, "/", label, "_SSURGO_", tab, ".csv", sep = "")) }) # create unique feature ID SSURGOPolys$fid <- 1:nrow(SSURGOPolys) # produce get_ssurgo-like list output ca109 <- (list(spatial = SSURGOPolys, tabular = SSURGOTables)) # define geometry of geopkg, and write mu polygons to file write_sf(st_as_sf(ca109$spatial), output.file, layer = 'mu_poly') # inspect st_layers("ca109.gpkg") agg.idx <- which(names(ca109$tabular) == "muaggatt") ca109$tabular[[agg.idx]]$mukeychr <- as.character(ca109$tabular[[agg.idx]]$mukey) # connect to write sqlite tables (just muaggatt #56) con <- dbConnect(RSQLite::SQLite(), dbname = "ca109.gpkg") dbListObjects(con) dbCreateTable(con, name = "muaggatt", fields = ca109$tabular[[agg.idx]], overwrite=TRUE) dbWriteTable(con, name = "muaggatt", value = ca109$tabular[[agg.idx]], overwrite=TRUE) dbDisconnect(con)
Categories: Recent activity