Project Initialization

In this first notebook, we initialize the project by creating the directory tree where inputs and outputs will be stored. Path variables are environment variables that are defined within the parameters.cfg file, which is located in your working directory. We also define a number of other environment variables that will be used at later stages of the BPMF workflow.

[1]:
import os

import pandas as pd
[2]:
config = {}

Define the path variables

NB: path variables can either be relative or absolute paths

[3]:
# INPUT_PATH: path to root directory where all input data will be stored
config["INPUT_PATH"] = "../BPMF_data"
# OUTPUT_PATH: path to root directory where all outputs produced by BPMF will be stored
config["OUTPUT_PATH"] = "../BPMF_outputs"
# NETWORK_PATH: path to directory where the network metadata file will be stored
config["NETWORK_PATH"] = "../network"
# MOVEOUTS_PATH: path to directory where travel times will be stored
config["MOVEOUTS_PATH"] = "../moveouts"
# NLLOC_INPUT_PATH: path to directory where NLLoc input files will be stored
config["NLLOC_INPUT_PATH"] = os.path.join(config["INPUT_PATH"], "NLLoc_inputs")
# NLLOC_OUTPUT_PATH: path to directory where NLLoc output files will be stored
config["NLLOC_OUTPUT_PATH"] = os.path.join(config["OUTPUT_PATH"], "NLLoc_outputs")

Create folder tree

[4]:
for var in ["INPUT_PATH", "OUTPUT_PATH", "NETWORK_PATH", "MOVEOUTS_PATH", "NLLOC_INPUT_PATH", "NLLOC_OUTPUT_PATH"]:
    if not os.path.isdir(config[var]):
        os.makedirs(config[var])

Define the preprocessing parameters

[5]:
# SAMPLING_RATE_HZ: target sampling rate of the resampled data
config["SAMPLING_RATE_HZ"] = 25.
# MIN_FREQ_HZ: minimum frequency, in Hertz, of the bandpass filter
config["MIN_FREQ_HZ"] = 2.
# MAX_FREQ_HZ: maximum frequency, in Hertz, of the bandpass filter
config["MAX_FREQ_HZ"] = 12.
# DATA_BUFFER_SEC: duration, in seconds, of the buffer data at the start and end of each day
config["DATA_BUFFER_SEC"] = 500.

Define the backprojection parameters

[6]:
# N_DEV_BP_THRESHOLD: number of deviations (e.g. rms or mad) above central tendency (e.g. mean or median) for
#                     backprojection detection threshold (not used in this tutorial)
config["N_DEV_BP_THRESHOLD"] = 15.

Define the template matching parameters

[7]:
# TEMPLATE_LEN_SEC: template length, in seconds
config["TEMPLATE_LEN_SEC"] = 8.
# MATCHED_FILTER_STEP_SAMP: step size, in samples, between two correlation coefficient measurements
config["MATCHED_FILTER_STEP_SAMP"] = 1
# N_DEV_MF_THRESHOLD: number of deviations (e.g. rms or mad) above central tendency (e.g. mean or median) for
#                     matched filter detection threshold
config["N_DEV_MF_THRESHOLD"] = 8.

Miscelleanous

[8]:
# NLLOC_BASENAME: basename of NLLoc files
config["NLLOC_BASENAME"] = "NAF"
# BUFFER_EXTRACTED_EVENTS_SEC: duration taken before origin time when reading an event's data
config["BUFFER_EXTRACTED_EVENTS_SEC"] = 20.

Write the parameters.csv file in current working directory

[9]:
# convert config dictionary to pandas.DataFrame for pretty print
config_pd = pd.DataFrame(data=[], index=[], columns=["parameter_value"])
config_pd.index.name = "parameter_name"
for key in config:
    config_pd.loc[key] = config[key]
[10]:
config_pd
[10]:
parameter_value
parameter_name
INPUT_PATH ../BPMF_data
OUTPUT_PATH ../BPMF_outputs
NETWORK_PATH ../network
MOVEOUTS_PATH ../moveouts
NLLOC_INPUT_PATH ../BPMF_data/NLLoc_inputs
NLLOC_OUTPUT_PATH ../BPMF_outputs/NLLoc_outputs
SAMPLING_RATE_HZ 25.0
MIN_FREQ_HZ 2.0
MAX_FREQ_HZ 12.0
DATA_BUFFER_SEC 500.0
N_DEV_BP_THRESHOLD 15.0
TEMPLATE_LEN_SEC 8.0
MATCHED_FILTER_STEP_SAMP 1
N_DEV_MF_THRESHOLD 8.0
NLLOC_BASENAME NAF
BUFFER_EXTRACTED_EVENTS_SEC 20.0
[11]:
with open("BPMF_parameters.cfg", "w") as fparams:
    for key in config:
        fparams.write(f"{key} = {config[key]}\n")