import requests

url = "https://earthquake.usgs.gov/fdsnws/event/1/query"

params = {
    'format': 'geojson',
    'starttime': '1023-01-01',
    'endtime': '2024-10-02',
    'latitude': 36.77,
    'longitude': -119.41,
    'maxradius': 50, 
    'minmagnitude': 5,
    'limit': 2000
}

response = requests.get(url, params=params)
response

<Response [200]>

data = response.json()

response.url

'https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=1023-01-01&endtime=2024-10-02&latitude=36.77&longitude=-119.41&maxradius=50&minmagnitude=5&limit=2000'

#data

import pandas as pd
import matplotlib.pyplot as plt

features = data['features']
df = pd.json_normalize(features)
df.head(3)

df['magnitude'] = df['properties.mag']
df['place'] = df['properties.place']
df['time'] = pd.to_datetime(df['properties.time'], unit='ms')

plt.figure(figsize=(12, 6))
plt.scatter(df['time'], df['magnitude'], alpha=0.5)
plt.title('Magnitude of Earthquakes in California Over the Last 12 Years')
plt.xlabel('Date')
plt.ylabel('Magnitude')
plt.xticks(rotation=45)
plt.grid()
plt.show()

!curl -G "https://earthquake.usgs.gov/fdsnws/event/1/query" \
--data-urlencode "format=geojson" \
--data-urlencode "starttime=1924-10-01" \
--data-urlencode "endtime=2024-10-01" \
--data-urlencode "latitude=36.7783" \
--data-urlencode "longitude=-119.4179" \
--data-urlencode "maxradius=50" \
--data-urlencode "minmagnitude=0.0" \
--data-urlencode "limit=1"

import requests

url = "https://api.fakeusgs.com/v1/seismic"

# Keep your API secure using environment variables
api_key = os.getenv("WEATHER_API_KEY")

# Parameters
params = {
    "latitude": 10,
    "longitude": 80,
    "limit": 100}

# Headers
headers = {
    "Authorization": f"Bearer {api_key}",
    "Accept": "application/json" }

response = requests.get(url, headers=headers, params=params)

import xml.etree.ElementTree as et

tree = et.parse('data/toy_data.xml')
tree

<xml.etree.ElementTree.ElementTree at 0x10f64e790>

# Gets the tree root tag
root = tree.getroot()
root

<Element 'seismic_data' at 0x1171798f0>

root.tag

'seismic_data'

# get root tag
print("What is the root tag:", root.tag)

# get root attributes
print("Attributes of the root tag:", root.attrib)

# get number of "children"
print("Number of children:", len(root))

What is the root tag: seismic_data
Attributes of the root tag: {}
Number of children: 3

for idx in range(len(root)):
    print("tag:", root[idx].tag, "| attribute:", root[idx].attrib)

tag: earthquake | attribute: {'id': '1', 'type': 'moderate'}
tag: earthquake | attribute: {'id': '2', 'type': 'light'}
tag: earthquake | attribute: {'id': '3', 'type': 'strong'}

x = root[0].attrib

x['type']

'moderate'

import xmltodict

with open('data/toy_data.xml', 'r') as file:
    xml_data = file.read()

xml_data

'<?xml version="1.0"?>\n<seismic_data>\n   <earthquake id="1" type="moderate">\n       <magnitude>4.5</magnitude>\n       <location>10 km N of Turlock, CA</location>\n       <date>2021-09-01</date>\n       <depth>10</depth> <!-- in kilometers -->\n       <coordinates>\n           <latitude>36.7783</latitude>\n           <longitude>-119.4179</longitude>\n       </coordinates>\n   </earthquake>\n   <earthquake id="2" type="light">\n       <magnitude>5.1</magnitude>\n       <location>20 km S of San Jose, CA</location>\n       <date>2022-06-15</date>\n       <depth>15</depth> <!-- in kilometers -->\n       <coordinates>\n           <latitude>37.3349</latitude>\n           <longitude>-121.8890</longitude>\n       </coordinates>\n   </earthquake>\n   <earthquake id="3" type="strong">\n       <magnitude>6.0</magnitude>\n       <location>30 km SW of Los Angeles, CA</location>\n       <date>2023-01-10</date>\n       <depth>20</depth> <!-- in kilometers -->\n       <coordinates>\n           <latitude>33.9490</latitude>\n           <longitude>-118.4040</longitude>\n       </coordinates>\n   </earthquake>\n</seismic_data>\n'

data_dict = xmltodict.parse(xml_data)
data_dict.keys()

dict_keys(['seismic_data'])

from bs4 import BeautifulSoup

soup = BeautifulSoup(xml_data, 'lxml-xml')

soup.find_all('earthquake')[1]

<earthquake id="2" type="light">
<magnitude>5.1</magnitude>
<location>20 km S of San Jose, CA</location>
<date>2022-06-15</date>
<depth>15</depth> <!-- in kilometers -->
<coordinates>
<latitude>37.3349</latitude>
<longitude>-121.8890</longitude>
</coordinates>
</earthquake>

import pandas as pd

file = 'data/toy_data.json'
json_data = pd.read_json(file)
json_data

from IPython.display import JSON
import pprint
import json

with open('data/toy_data.json') as file:
    data = json.load(file)

#pprint.pprint(data)

# Just Jupyter Lab
JSON(data)

<IPython.core.display.JSON object>

pd.json_normalize(data)

data.keys()

dict_keys(['seismic_data'])

df = pd.json_normalize(data['seismic_data'], 
                       record_path=['earthquake_data', 'earthquakes'], 
                       meta=['region'],  # Meta data at the higher level
                       meta_prefix='_',  # Prefix for clarity
                       errors='ignore') # Explore ignore/raise
df

flattened_data = []

for region_info in data['seismic_data']:
    region = region_info.get('region', 'Unknown')
    for earthquake_data in region_info.get('earthquake_data', []):
        for earthquake in earthquake_data.get('earthquakes', []):
            flattened_entry = {
                'magnitude': earthquake['magnitude'],
                'location': earthquake['location'],
                'date': earthquake['date'],
                'region': region
            }
            flattened_data.append(flattened_entry)
pd.DataFrame(flattened_data)

import pandas as pd
import openpyxl

data_xlsx = pd.read_excel('data/toy_tabular.xlsx')
data_xlsx

data_xlsx = pd.read_excel('data/toy_tabular.xlsx', 
                          sheet_name='Data', 
                          header=1, 
                          usecols='B:E',
                          engine = 'openpyxl')
data_xlsx

	type	id	properties.mag	properties.place	properties.time	properties.updated	properties.tz	properties.url	properties.detail	properties.felt	...	properties.types	properties.nst	properties.dmin	properties.rms	properties.gap	properties.magType	properties.type	properties.title	geometry.type	geometry.coordinates
0	Feature	us7000ngxr	5.1	207 km WSW of Pistol River, Oregon	1727492655149	1728530066400	NaN	https://earthquake.usgs.gov/earthquakes/eventp...	https://earthquake.usgs.gov/fdsnws/event/1/que...	12.0	...	,dyfi,internal-moment-tensor,moment-tensor,nea...	69.0	1.910	1.35	213.0	mww	earthquake	M 5.1 - 207 km WSW of Pistol River, Oregon	Point	[-126.8404, 41.8543, 10]
1	Feature	us7000ngr0	5.1	84 km SW of Corinto, Nicaragua	1727408371854	1727444129040	NaN	https://earthquake.usgs.gov/earthquakes/eventp...	https://earthquake.usgs.gov/fdsnws/event/1/que...	NaN	...	,internal-moment-tensor,moment-tensor,origin,p...	70.0	1.021	0.93	135.0	mww	earthquake	M 5.1 - 84 km SW of Corinto, Nicaragua	Point	[-87.6523, 11.8785, 51.404]
2	Feature	us7000ngme	5.0	4 km NW of San Miguel Totolapan, Mexico	1727377382951	1727447850638	NaN	https://earthquake.usgs.gov/earthquakes/eventp...	https://earthquake.usgs.gov/fdsnws/event/1/que...	12.0	...	,dyfi,origin,phase-data,	134.0	1.636	0.97	158.0	mb	earthquake	M 5.0 - 4 km NW of San Miguel Totolapan, Mexico	Point	[-100.4196, 18.1862, 70.492]

Data Format	Example	What are we trying to use it for?
Text	Tweets, scripts, books	Sentiment analysis, other NLP
JSON or XML	Parsing APIs	Gather data, data ingestion process, trend analysis, forecasting
HTML	Web scraping	Get information where APIs are not available.
Images	Computer vision	Self-driving cars, building custom shoes, X-rays - diagnostics

Feature	JSON	XML
Syntax	Uses braces `{}` and brackets `[]`	Nested tags `<>`
Verbosity	Less verbose	Complex
Data Types	Supports arrays and objects natively	Requires additional attributes for lists
Parsing	Easier to parse with built-in functions	Requires a parser for XML structure
Human Readability	Easier for humans to read and write	More complex structure can be harder to read
Use Cases	APIs and configuration	Document storage and data interchange

	seismic_data
0	{'region': 'California', 'earthquake_data': [{...
1	{'region': 'Nevada', 'earthquake_data': [{'ear...
2	{'earthquake_data': [{'earthquakes': [{'magnit...

	magnitude	location	date	_region
0	4.5	10 km N of Turlock, CA	2021-09-01	California
1	3.8	15 km N of Los Angeles, CA	2021-10-01	California
2	4.2	5 km S of Reno, NV	2021-11-01	Nevada
3	3.9	20 km W of Las Vegas, NV	2021-12-01	Nevada
4	4.1	30 km N of Fresno, CA	2021-11-15	NaN

Lighthouse Labs

W4D4 - APIs and Other Data Types

Where Does Our Data Come From?¶

Meet Jason

How can Jason gather the data?¶

Challenges:¶

APIs in Data Science¶

Why use APIs?¶

Accessing Seismic Data via the USGS Earthquake API¶

Where is the USGS data really stored?¶

The API as a Server¶

Disclaimer: Can Data Science Predict Earthquakes?¶

From Terminal, a Browser, and Postman¶

Working with Paid APIs and Authentication Keys¶

Appendix I: Common HTTP Status Codes¶

Other Data Formats¶

Different data, different tools¶

JSON vs XML¶

Why Save Data After Downloading from an API¶

XML and BeautifulSoup¶

JSON¶

Other approaches¶

Other forms of tabular data¶

Excel Files¶

Useful Links¶

	magnitude	location	date	_region
0	4.5	13 km N of Turlock, CA	1996-09-01	California
1	3.8	12 km N of Los Angeles, CA	1996-10-01	California
2	4.2	3 km S of Reno, NV	1996-11-01	Nevada
3	3.9	15 km W of Las Vegas, NV	1997-12-01	Nevada
4	4.1	3 km N of Fresno, CA	1997-11-15	California

	Table 1	Unnamed: 1
0	NaN	NaN
1	Owner of Data:	Geo Jason