WRTDS Tutorial

The WRTDS1 model (Hirsch et al. 2010) is implimented in R using the EGRET package (Hirsch and DeCicco, 2015). It’s meant to be paired with data extraction from the USGS National Water Information System (NWIS), accessed via the dataRetrieval package (same authors). For data outside the US, we need to standardize the inputs to satisfy the model. The following is a tutorial for performing the model on water quality data in Canada.

1 Weighted Regressions Against Time Discharge and Seasons

Load packages

Below is a custom function that, when given a package name in the form of a text string, will check if it’s installed, install it, and then load into the library. Notice the exclamation mark in front of require: if the package is not installed, then it will install.

For this tutorial, we need the EGRET and dataRetrieval packages. R is generally case sensitive, so pay special care to the spelling.

loadpackages=function(packages){
  for(p in packages){
  if(!require(p,character.only=T,quietly=T)){install.packages(p)}
  library(p,character.only=T,quietly=T)  }
}
#loadpackages(c("EGRET","dataRetrieval","ggplot2","tidyhydat"))

Examine Data

Explore some sample data from the Bow River at Cochrane (upstream of the city of Calgary).

Note that we need to standardize the dates with a format string “%d-%b-%Y %H:%M”.

There appears to be a slight upwards trend in the data. We’ll explore this with WRTDS.

loadpackages("ggplot2")
setwd("C:/Users/Tyler/OneDrive/Software/R_Projects/workshops/2020_forWater_Rworkshop")
BowWQ=read.csv(file = "input/AB05BH0010_BowRiver_WQ.csv")
BowWQ$SampleDate=as.Date(BowWQ$SampleDate,format="%d-%b-%Y %H:%M")
ggplot(data=BowWQ,aes(x=SampleDate,y=as.numeric(Nitrate_NO3asN_mgL)))+
  geom_point()+ylab(expression(paste("Nitrate (",NO[3],NA^-NA," as mg N ",L^-1,")")))+
  ggtitle("Bow River at Cochrane, Alberta")

Download Hydrology Data

We’ll use the tidyhydat package to access Water Survey of Canada data for the Bow River. If this is your first use of the package, you’ll have to run download_hydat(). The function hy_stations allows us to sample sites, here we specify the province of Alberta. We can then subset these sites for the name and approximate latitude. It appears the Bow River at Cochrane should be a suitable gage!

loadpackages("tidyhydat")
#download_hydat()
ABstat=tidyhydat::hy_stations(prov_terr_state_loc = "AB")
BowWQ$Latitude[1]
## [1] 51.18306
ABstat[grepl("BOW RIVER",ABstat$STATION_NAME) & ABstat$LATITUDE>51 & ABstat$HYD_STATUS=="ACTIVE",]

Next, we’ll download the data for station 05BH004. A little data cleaning is necessary, to add all the dates of the study period.

BowCal=(as.data.frame(hy_daily_flows(station_number = "05BH004")))
BowCal$Date=as.Date(BowCal$Date)
head(BowCal)
length=as.numeric(max(BowCal$Date)-min(BowCal$Date))
BowCal=rbind(BowCal,
               do.call("rbind",lapply(1:length,function(d){
                 t=BowCal[1,]
                 t$Date=as.Date(BowCal$Date[1])+d-1
                 t$Value=NA
                 t$Symbol=NA
                 return(t)
               }))
)
BowCal=BowCal[order(BowCal$Date),]
BowCal=BowCal[!duplicated(BowCal$Date),]

Clean Data

BowWQ=BowWQ[BowWQ$SampleDate%in%BowCal$Date & BowWQ$SampleDate>="1998-09-15",]
BowWQ=BowWQ[!is.na(as.numeric(BowWQ$Nitrate_NO3asN_mgL)),]
BowCal=BowCal[BowCal$Date>=BowWQ$SampleDate[1] & BowCal$Date<=BowWQ$SampleDate[nrow(BowWQ)],]
wrtds.input=function(Sample.in,Sample.colnames,Daily.in,Daily.colnames){
  Sample=data.frame(Date=Sample.in[,Sample.colnames[1]])
  for(col in c("ConcLow","ConcHigh","Uncen","ConcAve","Julian","Month","Day",
             "DecYear","MonthSeq","waterYear","SinDY","CosDY")){Sample[,col]=NA}
  for(col in c("ConcLow","ConcHigh","ConcAve")){
    Sample[,col]=as.numeric(as.character(Sample.in[,Sample.colnames[2]]))}
  Sample$Uncen=1
  
  Daily=data.frame(Date=Daily.in[,Daily.colnames[1]])
  for(col in c("Q","Julian","Month","Day","DecYear","MonthSeq","waterYear",
             "Qualifier","i","LogQ","Q7","Q30")){Daily[,col]=NA}
  Daily$Q=Daily.in[,Daily.colnames[2]]
  Daily$LogQ=log(Daily$Q)
  Daily$i=1:nrow(Daily)
  Daily$Qualifier="A"
  
  leap=function(years){
    as.logical(ifelse((years%%4)==0,
                      ifelse((years%%100)==0,
                             ifelse((years%%400)==0,1,0),1),0))
  }
  for(d in c("Sample","Daily")){
    D=get(d)
    D$Date=as.character(D$Date)
    D$Month=as.POSIXlt(D$Date)$mo+1
    D$Day=as.POSIXlt(D$Date)$yday+1
    years=as.POSIXlt(D$Date)$year+1900
    D$DecYear=years+D$Day/ifelse(leap(years),366,365)
    D$waterYear=ifelse(D$Month<10,floor(D$DecYear),floor(D$DecYear)+1)
    D$Julian=as.numeric(as.Date(D$Date)-as.Date("1900-01-01"))
    D$MonthSeq=(floor(D$DecYear)-1900)*12+D$Month-1
    eval(call("<-",as.name(d),D))
  }
  Sample$SinDY=sin(pi*2*Sample$Day/365)
  Sample$CosDY=cos(pi*2*Sample$Day/365)
  
  INFO=data.frame("agency_cd"=NA)
  for(col in c("agency_cd","site_no","station_nm",
               "site_tp_cd","lat_va","long_va",
               "dec_lat_va","dec_long_va","coord_meth_cd",
               "coord_acy_cd","coord_datum_cd","dec_coord_datum_cd",
               "district_cd","state_cd","county_cd",
               "country_cd","land_net_ds","map_nm",
               "map_scale_fc","alt_va","alt_meth_cd",
               "alt_acy_va","alt_datum_cd","huc_cd",
               "basin_cd","topo_cd","instruments_cd",
               "construction_dt","inventory_dt","drain_area_va",
               "contrib_drain_area_va","tz_cd","local_time_fg",
               "reliability_cd","gw_file_cd","nat_aqfr_cd",
               "aqfr_cd","aqfr_type_cd","well_depth_va",
               "hole_depth_va","depth_src_cd","project_no",
               "drainSqKm","shortName","staAbbrev",
               "param.nm","param.units","paramShortName",
               "paramNumber","constitAbbrev","paStart",
               "paLong")){INFO[,col]=NA}
  eList <- mergeReport(INFO, Daily, Sample)
  return(eList)
}
loadpackages("EGRET")
eList=wrtds.input(
  BowWQ,c("SampleDate","Nitrate_NO3asN_mgL"),
  BowCal,c("Date","Value"))
## 
##  Discharge Record is 7029 days long, which is 19 years
##  First day of the discharge record is 1998-09-15 and last day is 2017-12-12
##  The water quality record has 224 samples
##  The first sample is from 1998-09-15 and the last sample is from 2017-12-12
##  Discharge: Minimum, mean and maximum 27.9 86.5 1750
##  Concentration: Minimum, mean and maximum 0.007 0.12 3.6
##  Percentage of the sample values that are censored is 0 %

Evaluate Data Retrieval

Let’s first confirm that the function we wrote produced what we expected. For Phosphorus we can explore the 3 objects we fed to mergeReport: INFO, Daily, and Sample. By using the function typeof, we see that eList00665 is a list-type object, and its names are INFO, Daily, Sample, and surfaces. surfaces is an empty space where the WRTDS results will eventually be written. In R, to index over a named division, we use the money sign (“$”) to get a subset of an object.

typeof(eList)
## [1] "list"
names(eList)
## [1] "INFO"     "Daily"    "Sample"   "surfaces"
head(eList$Sample)
head(eList$Daily)
nrow(eList$Sample)
## [1] 224
nrow(eList$Daily)
## [1] 7029

We see that INFO recorded the information we saw above. Sample has columns specifying chemical concentrations, including ConcAve, and Daily has a column Q for daily discharge. Sample has 1082 rows, and Daily has 14981 rows. This meets our expectations of the data availability for this site.

Making Plots

Let’s first practice making some simple plots.

We can plot the timeseries of discharge from the Patuxent River. The base plot function has inputs x and y for the x and y variables, and several other options for which we can specify text strings to label the plot axes and title. type=“l” specifies a line plot as opposed to type=“p” for points. All arguments are separated by commas.

ggplot(data=eList$Daily,aes(x=as.Date(Date),y=Q))+geom_line()+
  ylab("Daily Discharge (cfs)")+ggtitle("Bow River at Cochrane, Alberta")+
  theme(axis.title.x = element_blank())

Let’s plot the CQ relationship for nitrate:

ggplot(data=eList$Sample,aes(x=Q,y=ConcAve))+geom_point()+
  ylab("Nitrate (mg/L)")+xlab("Daily Discharge (cfs)")+
  ggtitle("Bow River at Cochrane, Alberta")

eList <- modelEstimation(eList)
## 
##  first step running estCrossVal may take about 1 minute
##  estCrossVal % complete:
## 0    1   2   3   4   5   6   7   8   9   10  
## 11   12  13  14  15  16  17  18  19  20  
## 21   22  23  24  25  26  27  28  29  30  
## 31   32  33  34  35  36  37  38  39  40  
## 41   42  43  44  45  46  47  48  49  50  
## 51   52  53  54  55  56  57  58  59  60  
## 61   62  63  64  65  66  67  68  69  70  
## 71   72  73  74  75  76  77  78  79  80  
## 81   82  83  84  85  86  87  88  89  90  
## 91   92  93  94  95  96  97  98  99  
## Next step running  estSurfaces with survival regression:
## Survival regression (% complete):
## 0    1   2   3   4   5   6   7   8   9   10  
## 11   12  13  14  15  16  17  18  19  20  
## 21   22  23  24  25  26  27  28  29  30  
## 31   32  33  34  35  36  37  38  39  40  
## 41   42  43  44  45  46  47  48  49  50  
## 51   52  53  54  55  56  57  58  59  60  
## 61   62  63  64  65  66  67  68  69  70  
## 71   72  73  74  75  76  77  78  79  80  
## 81   82  83  84  85  86  87  88  89  90  
## 91   92  93  94  95  96  97  98  99  
## Survival regression: Done
BowNO3year=aggregate(eList$Daily,by=list(floor(eList$Daily$DecYear)),FUN="mean")
ggplot()+
  geom_line(data=BowNO3year,aes(x=DecYear,y=FNConc),col=2,lwd=2)+
  geom_line(data=eList$Daily,aes(x=DecYear,y=ConcDay))+
  geom_point(data=eList$Sample,aes(x=DecYear,y=ConcAve))+ylim(0,0.5)+
  ggtitle("Nitrate in the Bow River at Cochrane",subtitle = "WRTDS Model 1999-2019")+
  ylab("Concentration (mg/L)")+theme(axis.title.x = element_blank())