Make Network File

In this notebook, we compile the seismic network metadata that will be used recurrently through the workflow. We also scan the whole preprocessed data set to measure the daily data availability, which is essential for estimating the detection capability of the network at a given time.

[1]:
import fnmatch
import glob
import pandas as pd
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import obspy as obs

from BPMF.config import cfg
from BPMF.dataset import Network
from matplotlib.ticker import FixedLocator, FormatStrFormatter
[2]:
NETWORK_FILENAME = "network.csv"
AVAILABILITY_FILENAME = "availability.csv"
preproc_folder_name = f"preprocessed_{cfg.MIN_FREQ_HZ:.0f}_{cfg.MAX_FREQ_HZ:.0f}"
[3]:
# we check the station metadata and data availability between START_DATE and END_DATE
# these also define the start and end of the experiment
START_DATE = "2012-07-26"
END_DATE = "2012-07-26"
datelist = pd.date_range(start=START_DATE, end=END_DATE)
datelist
[3]:
DatetimeIndex(['2012-07-26'], dtype='datetime64[ns]', freq='D')
[4]:
# initialize data frames
daily_availability = pd.DataFrame()
network_metadata = pd.DataFrame(
    columns=["network_code", "station_code", "longitude", "latitude", "elevation_m"]
)
[5]:
# even though this tutorial analyzes a single day, this notebook is written for an extended
# data base following the folder tree convention used here
for date in datelist:
    row_name = date.strftime("%Y-%m-%d")
    data_folder = os.path.join(
        cfg.INPUT_PATH, str(date.year), date.strftime("%Y%m%d"), preproc_folder_name
    )
    resp_folder = os.path.join(
        cfg.INPUT_PATH, str(date.year), date.strftime("%Y%m%d"), "resp"
    )
    data_filenames = glob.glob(os.path.join(data_folder, "*mseed"))
    daily_network_metadata = pd.DataFrame(
        columns=["network_code", "station_code", "longitude", "latitude", "elevation_m"]
    )
    for fname in data_filenames:
        # we are only interested in the filename, not the entire path
        fname = os.path.basename(fname)
        # the filename contains information on the channel id
        net_code, sta_code, loc_code, cha_code, ext = fname.split(".")
        cha_code = cha_code[: cha_code.find("_")]
        # print(net_code, sta_code, loc_code, cha_code)
        daily_network_metadata.loc[
            f"{net_code}.{sta_code}", ["network_code", "station_code"]
        ] = [net_code, sta_code]

    for sta_id in daily_network_metadata.index:
        # count the number of channels associated with sta_id
        channels = fnmatch.filter(data_filenames, f"*{sta_id}.*mseed")
        daily_availability.loc[row_name, sta_id] = len(channels)
        if sta_id not in network_metadata.index:
            station_inv = obs.read_inventory(
                os.path.join(resp_folder, f"{sta_id}.xml")
            )[0][0]
            daily_network_metadata.loc[
                sta_id, ["longitude", "latitude", "elevation_m"]
            ] = [station_inv.longitude, station_inv.latitude, station_inv.elevation]
    network_metadata = pd.concat([network_metadata, daily_network_metadata]).drop_duplicates()

[6]:
network_metadata
[6]:
network_code station_code longitude latitude elevation_m
YH.DD06 YH DD06 30.31777 40.623539 182.0
YH.SAUV YH SAUV 30.3272 40.7402 170.0
YH.DC06 YH DC06 30.265751 40.616718 555.0
YH.DC08 YH DC08 30.25013 40.744438 162.0
YH.SPNC YH SPNC 30.3083 40.686001 190.0
YH.DE08 YH DE08 30.406469 40.748562 31.0
YH.DC07 YH DC07 30.24217 40.66708 164.0
YH.DE07 YH DE07 30.411539 40.679661 40.0
[7]:
daily_availability
[7]:
YH.DD06 YH.SAUV YH.DC06 YH.DC08 YH.SPNC YH.DE08 YH.DC07 YH.DE07
2012-07-26 3.0 3.0 3.0 3.0 3.0 3.0 3.0 3.0

Save the network metadata and data availability

[8]:
network_metadata.index.name = "station_id"
print(os.path.join(cfg.NETWORK_PATH, NETWORK_FILENAME))
network_metadata.to_csv(os.path.join(cfg.NETWORK_PATH, NETWORK_FILENAME), sep="\t")
# add two header lines
with open(os.path.join(cfg.NETWORK_PATH, NETWORK_FILENAME), "r+") as fnet:
    content = fnet.read()
    # move pointer to beginning of file
    fnet.seek(0, 0)
    # append lines at the beginning
    fnet.write(f"{START_DATE}\t{END_DATE}\n")
    # write the name of the components used on each station
    # note: the list of components will be used to broadcast
    # network waveforms into a single numpy.ndarray, so even
    # if some stations only have one component we need to
    # fill their missing components with zeros in order to
    # keep consistent data dimensions across stations
    fnet.write(f"N\tE\tZ\n")
    fnet.write(content)
daily_availability.to_csv(os.path.join(cfg.NETWORK_PATH, AVAILABILITY_FILENAME))
../network/network.csv

Test reading the new network file with BPMF.dataset.Network

The csv file with network metadata is meant to be read into an instance of BPMF.dataset.Network.

[9]:
net = Network(NETWORK_FILENAME)
net.read()
[10]:
# list of station codes
net.stations
[10]:
stations
DD06    DD06
SAUV    SAUV
DC06    DC06
DC08    DC08
SPNC    SPNC
DE08    DE08
DC07    DC07
DE07    DE07
Name: stations, dtype: object
[11]:
# inter-station distance in km
net.interstation_distances
[11]:
stations DD06 SAUV DC06 DC08 SPNC DE08 DC07 DE07
stations
DD06 0.000000 12.978986 4.507919 14.591825 6.982891 15.774078 8.015674 10.079368
SAUV 12.979910 0.000000 14.676627 6.525864 6.228594 6.748812 10.842688 9.790711
DC06 4.424402 14.650371 0.000000 14.230564 8.472197 18.850665 5.904608 14.155945
DC08 14.593195 6.527089 14.258154 0.000000 8.142368 13.206857 8.616882 15.415829
SPNC 6.981746 6.225382 8.515170 8.138928 0.000000 10.812369 5.970570 8.747317
DE08 15.783648 6.769376 18.878441 13.216772 10.827065 0.000000 16.576421 7.663916
DC07 8.017919 10.843241 5.970460 8.616650 5.974924 16.568395 0.000000 14.383684
DE07 10.093446 9.803980 14.192278 15.423741 8.764449 7.662741 14.392302 0.000000
[12]:
%config InlineBackend.figure_formats = ["svg"]

# plot a simple map with the station locations
fig = net.plot_map()
../../_images/tutorial_notebooks_3_network_file_14_0.svg