{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Make Network File\n", "\n", "In this notebook, we compile the seismic network metadata that will be used recurrently through the workflow. We also scan the whole preprocessed data set to measure the daily data availability, which is essential for estimating the detection capability of the network at a given time." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import fnmatch\n", "import glob\n", "import pandas as pd\n", "import os\n", "import sys\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import obspy as obs\n", "\n", "from BPMF.config import cfg\n", "from BPMF.dataset import Network\n", "from matplotlib.ticker import FixedLocator, FormatStrFormatter" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "NETWORK_FILENAME = \"network.csv\"\n", "AVAILABILITY_FILENAME = \"availability.csv\"\n", "preproc_folder_name = f\"preprocessed_{cfg.MIN_FREQ_HZ:.0f}_{cfg.MAX_FREQ_HZ:.0f}\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatetimeIndex(['2012-07-26'], dtype='datetime64[ns]', freq='D')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# we check the station metadata and data availability between START_DATE and END_DATE\n", "# these also define the start and end of the experiment\n", "START_DATE = \"2012-07-26\"\n", "END_DATE = \"2012-07-26\"\n", "datelist = pd.date_range(start=START_DATE, end=END_DATE)\n", "datelist" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# initialize data frames\n", "daily_availability = pd.DataFrame()\n", "network_metadata = pd.DataFrame(\n", " columns=[\"network_code\", \"station_code\", \"longitude\", \"latitude\", \"elevation_m\"]\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# even though this tutorial analyzes a single day, this notebook is written for an extended\n", "# data base following the folder tree convention used here\n", "for date in datelist:\n", " row_name = date.strftime(\"%Y-%m-%d\")\n", " data_folder = os.path.join(\n", " cfg.INPUT_PATH, str(date.year), date.strftime(\"%Y%m%d\"), preproc_folder_name\n", " )\n", " resp_folder = os.path.join(\n", " cfg.INPUT_PATH, str(date.year), date.strftime(\"%Y%m%d\"), \"resp\"\n", " )\n", " data_filenames = glob.glob(os.path.join(data_folder, \"*mseed\"))\n", " daily_network_metadata = pd.DataFrame(\n", " columns=[\"network_code\", \"station_code\", \"longitude\", \"latitude\", \"elevation_m\"]\n", " )\n", " for fname in data_filenames:\n", " # we are only interested in the filename, not the entire path\n", " fname = os.path.basename(fname)\n", " # the filename contains information on the channel id\n", " net_code, sta_code, loc_code, cha_code, ext = fname.split(\".\")\n", " cha_code = cha_code[: cha_code.find(\"_\")]\n", " # print(net_code, sta_code, loc_code, cha_code)\n", " daily_network_metadata.loc[\n", " f\"{net_code}.{sta_code}\", [\"network_code\", \"station_code\"]\n", " ] = [net_code, sta_code]\n", " \n", " for sta_id in daily_network_metadata.index:\n", " # count the number of channels associated with sta_id\n", " channels = fnmatch.filter(data_filenames, f\"*{sta_id}.*mseed\")\n", " daily_availability.loc[row_name, sta_id] = len(channels)\n", " if sta_id not in network_metadata.index:\n", " station_inv = obs.read_inventory(\n", " os.path.join(resp_folder, f\"{sta_id}.xml\")\n", " )[0][0]\n", " daily_network_metadata.loc[\n", " sta_id, [\"longitude\", \"latitude\", \"elevation_m\"]\n", " ] = [station_inv.longitude, station_inv.latitude, station_inv.elevation]\n", " network_metadata = pd.concat([network_metadata, daily_network_metadata]).drop_duplicates()\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
network_codestation_codelongitudelatitudeelevation_m
YH.DD06YHDD0630.3177740.623539182.0
YH.DE08YHDE0830.40646940.74856231.0
YH.SPNCYHSPNC30.308340.686001190.0
YH.DC08YHDC0830.2501340.744438162.0
YH.DC06YHDC0630.26575140.616718555.0
YH.SAUVYHSAUV30.327240.7402170.0
YH.DE07YHDE0730.41153940.67966140.0
YH.DC07YHDC0730.2421740.66708164.0
\n", "
" ], "text/plain": [ " network_code station_code longitude latitude elevation_m\n", "YH.DD06 YH DD06 30.31777 40.623539 182.0\n", "YH.DE08 YH DE08 30.406469 40.748562 31.0\n", "YH.SPNC YH SPNC 30.3083 40.686001 190.0\n", "YH.DC08 YH DC08 30.25013 40.744438 162.0\n", "YH.DC06 YH DC06 30.265751 40.616718 555.0\n", "YH.SAUV YH SAUV 30.3272 40.7402 170.0\n", "YH.DE07 YH DE07 30.411539 40.679661 40.0\n", "YH.DC07 YH DC07 30.24217 40.66708 164.0" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "network_metadata" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
YH.DD06YH.DE08YH.SPNCYH.DC08YH.DC06YH.SAUVYH.DE07YH.DC07
2012-07-263.03.03.03.03.03.03.03.0
\n", "
" ], "text/plain": [ " YH.DD06 YH.DE08 YH.SPNC YH.DC08 YH.DC06 YH.SAUV YH.DE07 \\\n", "2012-07-26 3.0 3.0 3.0 3.0 3.0 3.0 3.0 \n", "\n", " YH.DC07 \n", "2012-07-26 3.0 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "daily_availability" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Save the network metadata and data availability" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../network/network.csv\n" ] } ], "source": [ "network_metadata.index.name = \"station_id\"\n", "print(os.path.join(cfg.NETWORK_PATH, NETWORK_FILENAME))\n", "network_metadata.to_csv(os.path.join(cfg.NETWORK_PATH, NETWORK_FILENAME), sep=\"\\t\")\n", "# add two header lines\n", "with open(os.path.join(cfg.NETWORK_PATH, NETWORK_FILENAME), \"r+\") as fnet:\n", " content = fnet.read()\n", " # move pointer to beginning of file\n", " fnet.seek(0, 0)\n", " # append lines at the beginning\n", " fnet.write(f\"{START_DATE}\\t{END_DATE}\\n\")\n", " # write the name of the components used on each station\n", " # note: the list of components will be used to broadcast\n", " # network waveforms into a single numpy.ndarray, so even\n", " # if some stations only have one component we need to \n", " # fill their missing components with zeros in order to\n", " # keep consistent data dimensions across stations\n", " fnet.write(f\"N\\tE\\tZ\\n\")\n", " fnet.write(content)\n", "daily_availability.to_csv(os.path.join(cfg.NETWORK_PATH, AVAILABILITY_FILENAME))" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Test reading the new network file with `BPMF.dataset.Network`\n", "\n", "The csv file with network metadata is meant to be read into an instance of `BPMF.dataset.Network`." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "net = Network(NETWORK_FILENAME)\n", "net.read()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['DD06', 'DE08', 'SPNC', 'DC08', 'DC06', 'SAUV', 'DE07', 'DC07'],\n", " dtype='\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DD06DE08SPNCDC08DC06SAUVDE07DC07
DD060.00000015.7795866.98232314.5925244.48190412.97945410.0874098.016817
DE0815.7795860.00000010.82088813.21246518.8718346.7605317.66333416.572942
SPNC6.98232310.8208880.0000008.1406968.5015496.2270208.7571725.972804
DC0814.59252413.2124658.1406960.00000014.2497866.52648215.4202688.616766
DC064.48190418.8718348.50154914.2497860.00000014.66855814.1834765.950485
SAUV12.9794546.7605316.2270206.52648214.6685580.0000009.79821010.842966
DE0710.0874097.6633348.75717215.42026814.1834769.7982100.00000014.388528
DC078.01681716.5729425.9728048.6167665.95048510.84296614.3885280.000000
\n", "" ], "text/plain": [ " DD06 DE08 SPNC DC08 DC06 SAUV \\\n", "DD06 0.000000 15.779586 6.982323 14.592524 4.481904 12.979454 \n", "DE08 15.779586 0.000000 10.820888 13.212465 18.871834 6.760531 \n", "SPNC 6.982323 10.820888 0.000000 8.140696 8.501549 6.227020 \n", "DC08 14.592524 13.212465 8.140696 0.000000 14.249786 6.526482 \n", "DC06 4.481904 18.871834 8.501549 14.249786 0.000000 14.668558 \n", "SAUV 12.979454 6.760531 6.227020 6.526482 14.668558 0.000000 \n", "DE07 10.087409 7.663334 8.757172 15.420268 14.183476 9.798210 \n", "DC07 8.016817 16.572942 5.972804 8.616766 5.950485 10.842966 \n", "\n", " DE07 DC07 \n", "DD06 10.087409 8.016817 \n", "DE08 7.663334 16.572942 \n", "SPNC 8.757172 5.972804 \n", "DC08 15.420268 8.616766 \n", "DC06 14.183476 5.950485 \n", "SAUV 9.798210 10.842966 \n", "DE07 0.000000 14.388528 \n", "DC07 14.388528 0.000000 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# inter-station distance in km\n", "net.interstation_distances" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", " \n", " 2026-03-12T16:33:12.318852\n", " image/svg+xml\n", " \n", " \n", " Matplotlib v3.10.5, https://matplotlib.org/\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n" ], "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%config InlineBackend.figure_formats = [\"svg\"]\n", "\n", "# plot a simple map with the station locations\n", "fig = net.plot_map()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10.4 ('hy7_py310')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "221f0e5b1b98151b07a79bf3b6d0c1d306576197d2c4531763770570a29e708e" } } }, "nbformat": 4, "nbformat_minor": 2 }