Skip to content

Recognize long and lon for longitude #51

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@ For full dashboard functionality, upload a CSV or XLS file with the following co
- `Sex`: Sex of each sample.
- `hybrid_stat`: Hybrid status of each sample (eg., 'valid_subspecies', 'subspecies_synonym', or 'unknown').
- `lat`*: Latitude at which image was taken or specimen was collected: number in [-90,90].
- `lon`*: Longitude at which image was taken or specimen was collected: number in [-180,180].
- `lon`*: Longitude at which image was taken or specimen was collected: number in [-180,180]. `long` will also be accepted.
- `file_url`*: URL to access file.

***Note:**
- Column names are **not** case-sensitive.
- `lat` and `lon` columns are not required to utilize the dashboard, but there will be no map view if they are not included. Blank (or null) entries are recorded as `unknown`, and thus excluded from map view.
- `Image_filename` and `file_url` are not required, but there will be no sample images option if either one is not included.
- `locality` may be provided, otherwise it will take on the value `lat|lon` or `unknown` if these are not provided.
Expand Down
8 changes: 4 additions & 4 deletions components/divs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
{'label': 'Subspecies', 'value': 'Subspecies'},
{'label':'View', 'value': 'View'},
{'label': 'Sex', 'value': 'Sex'},
{'label': 'Hybrid Status', 'value':'hybrid_stat'},
{'label': 'Locality', 'value': 'locality'}
{'label': 'Hybrid Status', 'value':'Hybrid_stat'},
{'label': 'Locality', 'value': 'Locality'}
]
DOCS_URL = "https://github.com/Imageomics/dashboard-prototype#how-it-works"
DOCS_LINK = html.A("documentation",
Expand Down Expand Up @@ -196,8 +196,8 @@ def get_img_div(df, all_species, img_url):
style = QUARTER_DIV_STYLE
),
html.Div([
dcc.Checklist(df.hybrid_stat.unique(),
df.hybrid_stat.unique()[0:2],
dcc.Checklist(df.Hybrid_stat.unique(),
df.Hybrid_stat.unique()[0:2],
id = 'hybrid?')],
style = QUARTER_DIV_STYLE
),
Expand Down
8 changes: 4 additions & 4 deletions components/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def make_map(df, color_by):
# only use entries that have valid lat & lon for mapping
df = df.loc[df['lat-lon'].str.contains('unknown') == False]
fig = px.scatter_mapbox(df,
lat = "lat",
lon = "lon",
lat = "Lat",
lon = "Lon",
#projection = "natural earth",
custom_data = ["Samples_at_locality", "Species_at_locality", "Subspecies_at_locality"],
size = "Samples_at_locality",
Expand All @@ -64,8 +64,8 @@ def make_map(df, color_by):
title = "Distribution of Samples")

fig.update_traces(hovertemplate =
"Latitude: %{lat}<br>"+
"Longitude: %{lon}<br>" +
"Latitude: %{Lat}<br>"+
"Longitude: %{Lon}<br>" +
"Samples at lat/lon: %{customdata[0]}<br>" +
"Species at lat/lon: %{customdata[1]}<br>" +
"Subspecies at lat/lon: %{customdata[2]}<br>"
Expand Down
22 changes: 11 additions & 11 deletions components/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def get_data(df, mapping, features):
df - DataFrame of the data to visualize.
mapping - Boolean. True when lat/lon are given in dataset.
features - List of features (columns) included in the DataFrame. This is a subset of the suggested columns:
'Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename'
'Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename'

Returns:
--------
Expand All @@ -29,24 +29,24 @@ def get_data(df, mapping, features):
# Will likely choose to calculate and return this in later instance
cat_list = [{'label': 'Species', 'value': 'Species'},
{'label': 'Subspecies', 'value': 'Subspecies'},
{'label':'View', 'value': 'View'},
{'label': 'View', 'value': 'View'},
{'label': 'Sex', 'value': 'Sex'},
{'label': 'Hybrid Status', 'value':'hybrid_stat'},
{'label': 'Locality', 'value': 'locality'}
{'label': 'Hybrid Status', 'value':'Hybrid_stat'},
{'label': 'Locality', 'value': 'Locality'}
]

df = df.copy()
df = df.fillna('unknown')
features.append('locality')
features.append('Locality')

# If we don't have lat/lon, just return DataFrame with otherwise required features.
if not mapping:
if 'locality' not in df.columns:
df['locality'] = 'unknown'
if 'Locality' not in df.columns:
df['Locality'] = 'unknown'
return df[features], cat_list

# else lat and lon are in dataset, so process locality information
df['lat-lon'] = df['lat'].astype(str) + '|' + df['lon'].astype(str)
df['lat-lon'] = df['Lat'].astype(str) + '|' + df['Lon'].astype(str)
df["Samples_at_locality"] = df['lat-lon'].map(df['lat-lon'].value_counts()) # will duplicate if multiple views of same sample

# Count and record number of species and subspecies at each lat-lon
Expand All @@ -56,8 +56,8 @@ def get_data(df, mapping, features):
df.loc[df['lat-lon'] == lat_lon, "Species_at_locality"] = ", ".join(species_list)
df.loc[df['lat-lon'] == lat_lon, "Subspecies_at_locality"] = ", ".join(subspecies_list)

if 'locality' not in df.columns:
df['locality'] = df['lat-lon'] # contains "unknown" if lat or lon null
if 'Locality' not in df.columns:
df['Locality'] = df['lat-lon'] # contains "unknown" if lat or lon null

new_features = ['lat-lon', "Samples_at_locality", "Species_at_locality", "Subspecies_at_locality"]
for feature in new_features:
Expand Down Expand Up @@ -157,7 +157,7 @@ def get_filenames(df, subspecies, view, sex, hybrid, num_images):
df_sub = df.loc[df.Subspecies.isin(subspecies)].copy()
df_sub = df_sub.loc[df_sub.View.isin(view)]
df_sub = df_sub.loc[df_sub.Sex.isin(sex)]
df_sub = df_sub.loc[df_sub.hybrid_stat.isin(hybrid)]
df_sub = df_sub.loc[df_sub.Hybrid_stat.isin(hybrid)]

num_entries = len(df_sub)
# Filter out any entries that have missing filenames or URLs:
Expand Down
24 changes: 16 additions & 8 deletions dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,21 @@ def parse_contents(contents, filename):
# If no image urls, disable sample image options
mapping = True
img_urls = True
features = ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename']
features = ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename']
included_features = []
df.columns = df.columns.str.capitalize()
for feature in features:
if feature not in list(df.columns):
if feature == 'lat' or feature == 'lon':
mapping = False
elif feature == 'file_url':
if feature == 'Lat' or feature == 'Lon':
if feature == 'Lon':
if 'Long' not in list(df.columns):
mapping = False
else:
df = df.rename(columns = {"Long": "Lon"})
included_features.append('Lon')
else:
mapping = False
elif feature == 'File_url':
img_urls = False
elif feature == 'Image_filename':
# If 'Image_filename' missing, return missing column if 'file_url' is included.
Expand All @@ -102,10 +110,10 @@ def parse_contents(contents, filename):
if mapping:
try:
# Check lat and lon within appropriate ranges (lat: [-90, 90], lon: [-180, 180])
valid_lat = df['lat'].astype(float).between(-90, 90)
df.loc[~valid_lat, 'lat'] = 'unknown'
valid_lon = df['lon'].astype(float).between(-180, 180)
df.loc[~valid_lon, 'lon'] = 'unknown'
valid_lat = df['Lat'].astype(float).between(-90, 90)
df.loc[~valid_lat, 'Lat'] = 'unknown'
valid_lon = df['Lon'].astype(float).between(-180, 180)
df.loc[~valid_lon, 'Lon'] = 'unknown'
except ValueError as e:
print(e)
return json.dumps({'error': {'mapping': str(e)}})
Expand Down
11 changes: 11 additions & 0 deletions test_data/HCGSD_test_latLong.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
NHM_Specimen,Image_filename,View,Species,Subspecies,Sex,addit_taxa_info,type_stat,hybrid_stat,in_reduced,locality,lat,long,speciesdesig,file_url
10429021,10429021_V_lowres.png,,erato,notabilis,,f._notabilis,,subspecies synonym,1,,-1.583333333,-77.75,e. notabilis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
10428972,10428972_V_lowres.png,ventral,erato,petiverana,male,petiverana,,valid subspecies,1,Songolica (= Zongolica) MEX VC,18.66666667,-96.98333333,e. petiverana,
10429172,,ventral,,petiverana,male,petiverana,,valid subspecies,1,San Ramon NIC ZE,89,-84.68333333,e. petiverana,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
10428595,10428595_D_lowres.png,dorsal,erato,phyllis,male,f._phyllis,,subspecies synonym,1,Resistencia ARG CH,-27.45,-58.98333333,e. phyllis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/
10428140,10428140_V_lowres.png,ventral,,plesseni,male,plesseni,,valid subspecies,1,Banos ECD TU,-1.4,-74,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
10428250,10428250_V_lowres.png,ventral,melpomene,,male,ab._rubra,,subspecies synonym,1,Caradoc (Hda) PER CU,-13.36666667,-70.95,m. schunkei,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
10427979,,dorsal,melpomene,rosina_S,male,rosina_S,,valid subspecies,1,Turrialba CRI CA,9.883333333,-83.63333333,m. rosina,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/
10428803,10428803_D_lowres.png,dorsal,erato,guarica,female,guarica,,valid subspecies,1,Fusagasuga COL CN,4.35,-74.36666667,e. guarica,
10428169,10428169_V_lowres.png,ventral,melpomene,plesseni,male,f._pura,ST,subspecies synonym,1,Canelos ECD PA,-1.583333333,73,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/
10428321,10428321_D_lowres.png,,melpomene,nanna,male,nanna,ST,valid subspecies,1,Espirito Santo BRA ES,-20.33333333,-40.28333333,m. nanna,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/
2 changes: 1 addition & 1 deletion tests/components/test_divs.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_get_img_div():
'Subspecies': ['subspecies1', 'subspecies2', 'subspecies4'],
'View': ['ventral', 'ventral', 'dorsal'],
'Sex': ['male', 'female', 'female'],
'hybrid_stat': ['subspecies synonym', 'valid subspecies', 'subspecies synonym']
'Hybrid_stat': ['subspecies synonym', 'valid subspecies', 'subspecies synonym']
}
df = pd.DataFrame(data = data)

Expand Down
4 changes: 3 additions & 1 deletion tests/components/test_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

# Define test data
df = pd.read_csv("test_data/HCGSD_full_testNA.csv")
included_features = ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename']
# Update columns since not running through parse
df.columns = df.columns.str.capitalize()
included_features = ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename']
processed_df, cat_list = get_data(df, True, included_features)

def test_make_hist_plot():
Expand Down
17 changes: 8 additions & 9 deletions tests/components/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,23 @@ def test_get_data(self):
data = {
'Species': ['melpomene', 'melpomene', 'erato', 'melpomene', 'erato', 'species3'],
'Subspecies': ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', None],
'lat': [-13.43, 5.25, 5.25, 9.9, 5.25, 9.9],
'lon': [-70.38, -55.25, -55.25, -83.73, -55.25, -55.25]
'Lat': [-13.43, 5.25, 5.25, 9.9, 5.25, 9.9],
'Lon': [-70.38, -55.25, -55.25, -83.73, -55.25, -55.25]
}
cat_list = [{'label': 'Species', 'value': 'Species'},
{'label': 'Subspecies', 'value': 'Subspecies'},
{'label':'View', 'value': 'View'},
{'label': 'Sex', 'value': 'Sex'},
{'label': 'Hybrid Status', 'value':'hybrid_stat'},
{'label': 'Locality', 'value': 'locality'}]
features = ['Species', 'Subspecies', 'lat', 'lon']
{'label': 'Hybrid Status', 'value':'Hybrid_stat'},
{'label': 'Locality', 'value': 'Locality'}]
features = ['Species', 'Subspecies', 'Lat', 'Lon']
locality = ['-13.43|-70.38', '5.25|-55.25', '5.25|-55.25', '9.9|-83.73','5.25|-55.25', '9.9|-55.25']

# Test with mapping = True (location data)
df = pd.DataFrame(data = data)
result_df, result_list = get_data(df, True, features)
self.assertEqual(result_df['lat-lon'].tolist(), locality)
self.assertEqual(result_df['locality'].tolist(), locality)
self.assertEqual(result_df['Locality'].tolist(), locality)
self.assertEqual(result_df["Samples_at_locality"].tolist(), [1,3,3,1,3,1])
self.assertEqual(result_df["Species_at_locality"].tolist(), ['melpomene', 'melpomene, erato', 'melpomene, erato', 'melpomene', 'melpomene, erato', 'species3'])
self.assertEqual(result_df["Subspecies_at_locality"].tolist(), ['schunkei', 'nanna, erato, guarica', 'nanna, erato, guarica', 'rosina_N', 'nanna, erato, guarica', 'unknown'])
Expand All @@ -48,8 +48,7 @@ def test_get_data(self):
# Test with mapping = False (no location data)
df2 = pd.DataFrame(data = {key: data[key] for key in ['Species', 'Subspecies']})
result_df2, result2_list = get_data(df2, False, features[:2])
#self.assertEqual('locality' not in result_df2.columns, True)
self.assertEqual(result_df2['locality'].tolist(), ['unknown' for i in range(len(locality))])
self.assertEqual(result_df2['Locality'].tolist(), ['unknown' for i in range(len(locality))])
self.assertEqual(result_df2["Species"].tolist(), ['melpomene', 'melpomene', 'erato', 'melpomene', 'erato', 'species3'])
self.assertEqual(result_df2["Subspecies"].tolist(), ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', 'unknown'])
self.assertEqual(result2_list, cat_list)
Expand All @@ -62,7 +61,7 @@ def test_get_filenames(self):
'Subspecies': ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', 'subspecies6', 'subspecies6'],
'View': ['ventral', 'ventral', 'ventral', 'dorsal', 'dorsal', 'ventral', 'dorsal'],
'Sex': ['male', 'female', 'female', 'male', 'female', 'male', 'female'],
'hybrid_stat': ['subspecies synonym',
'Hybrid_stat': ['subspecies synonym',
'valid subspecies',
'subspecies synonym',
'valid subspecies',
Expand Down
2 changes: 1 addition & 1 deletion tests/test_app_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from dashboard import update_dist_view, update_dist_plot, update_pie_plot, set_subspecies_options, update_display

# Define test data
data = {'processed_df': '{"columns":["Species","Subspecies","View","Sex","hybrid_stat","lat","lon","lat-lon","Samples_at_locality","Species_at_locality","Subspecies_at_locality"],"index":[0,1,2,3,4,5,6,7,8,9],"data":[["erato","notabilis","unknown","unknown","subspecies synonym",-1.583333333,-77.75,"-1.583333333|-77.75",1,"erato","notabilis"],["erato","petiverana","ventral","male","valid subspecies",18.66666667,-96.98333333,"18.66666667|-96.98333333",1,"erato","petiverana"],["unknown","petiverana","ventral","male","valid subspecies","unknown",-84.68333333,"unknown|-84.68333333",1,"unknown","petiverana"],["erato","phyllis","dorsal","male","subspecies synonym",-27.45,-58.98333333,"-27.45|-58.98333333",1,"erato","phyllis"],["unknown","plesseni","ventral","male","valid subspecies",-1.4,"unknown","-1.4|unknown",1,"unknown","plesseni"],["melpomene","unknown","ventral","male","subspecies synonym",-13.36666667,-70.95,"-13.36666667|-70.95",1,"melpomene","unknown"],["melpomene","rosina_S","dorsal","male","valid subspecies",9.883333333,-83.63333333,"9.883333333|-83.63333333",1,"melpomene","rosina_S"],["erato","guarica","dorsal","female","valid subspecies",4.35,-74.36666667,"4.35|-74.36666667",1,"erato","guarica"],["melpomene","plesseni","ventral","male","subspecies synonym",-1.583333333,"unknown","-1.583333333|unknown",1,"melpomene","plesseni"],["melpomene","nanna","unknown","male","valid subspecies",-20.33333333,-40.28333333,"-20.33333333|-40.28333333",1,"melpomene","nanna"]]}',
data = {'processed_df': '{"columns":["Species","Subspecies","View","Sex","Hybrid_stat","Lat","Lon","lat-lon","Samples_at_locality","Species_at_locality","Subspecies_at_locality"],"index":[0,1,2,3,4,5,6,7,8,9],"data":[["erato","notabilis","unknown","unknown","subspecies synonym",-1.583333333,-77.75,"-1.583333333|-77.75",1,"erato","notabilis"],["erato","petiverana","ventral","male","valid subspecies",18.66666667,-96.98333333,"18.66666667|-96.98333333",1,"erato","petiverana"],["unknown","petiverana","ventral","male","valid subspecies","unknown",-84.68333333,"unknown|-84.68333333",1,"unknown","petiverana"],["erato","phyllis","dorsal","male","subspecies synonym",-27.45,-58.98333333,"-27.45|-58.98333333",1,"erato","phyllis"],["unknown","plesseni","ventral","male","valid subspecies",-1.4,"unknown","-1.4|unknown",1,"unknown","plesseni"],["melpomene","unknown","ventral","male","subspecies synonym",-13.36666667,-70.95,"-13.36666667|-70.95",1,"melpomene","unknown"],["melpomene","rosina_S","dorsal","male","valid subspecies",9.883333333,-83.63333333,"9.883333333|-83.63333333",1,"melpomene","rosina_S"],["erato","guarica","dorsal","female","valid subspecies",4.35,-74.36666667,"4.35|-74.36666667",1,"erato","guarica"],["melpomene","plesseni","ventral","male","subspecies synonym",-1.583333333,"unknown","-1.583333333|unknown",1,"melpomene","plesseni"],["melpomene","nanna","unknown","male","valid subspecies",-20.33333333,-40.28333333,"-20.33333333|-40.28333333",1,"melpomene","nanna"]]}',
'all_species': {'Erato': ['Any-Erato', 'notabilis', 'petiverana', 'phyllis', 'guarica'], 'Unknown': ['Any-Unknown', 'petiverana', 'plesseni'], 'Melpomene': ['Any-Melpomene', 'unknown', 'rosina_S', 'plesseni', 'nanna'], 'Any': ['Any', 'notabilis', 'petiverana', 'phyllis', 'plesseni', 'unknown', 'rosina_S', 'guarica', 'nanna']},
'mapping': True,
'images': True}
Expand Down
Loading