diff --git a/README.md b/README.md index 4066de4..0e12781 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,15 @@ For full dashboard functionality, upload a CSV or XLS file with the following co - `View`: View of the sample (eg., 'ventral' or 'dorsal' for butterflies). - `Sex`: Sex of each sample. - `hybrid_stat`: Hybrid status of each sample (eg., 'valid_subspecies', 'subspecies_synonym', or 'unknown'). -- `lat`*: Latitude at which image was taken or specimen was collected. -- `lon`*: Longitude at which image was taken or specimen was collected. +- `lat`*: Latitude at which image was taken or specimen was collected: number in [-90,90]. +- `lon`*: Longitude at which image was taken or specimen was collected: number in [-180,180]. `long` will also be accepted. - `file_url`*: URL to access file. ***Note:** -- `lat` and `lon` columns are not required to utilize the dashboard, but there will be no map view if they are not included. +- Column names are **not** case-sensitive. +- `lat` and `lon` columns are not required to utilize the dashboard, but there will be no map view if they are not included. Blank (or null) entries are recorded as `unknown`, and thus excluded from map view. - `Image_filename` and `file_url` are not required, but there will be no sample images option if either one is not included. +- `locality` may be provided, otherwise it will take on the value `lat|lon` or `unknown` if these are not provided. ## Running Dashboard diff --git a/components/divs.py b/components/divs.py index 80df309..e4e58e8 100644 --- a/components/divs.py +++ b/components/divs.py @@ -3,7 +3,7 @@ # Fixed styles and sorting options H1_STYLE = {'textAlign': 'center', 'color': 'MidnightBlue'} H4_STYLE = {'color': 'MidnightBlue', 'margin-bottom' : 10} -HALF_DIV_STYLE = {'width': '48%', 'display': 'inline-block'} +HALF_DIV_STYLE = {'height': '48%', 'width': '48%', 'display': 'inline-block'} QUARTER_DIV_STYLE = {'width': '24%', 'display': 'inline-block'} BUTTON_STYLE = {'color': 'MidnightBlue', 'background-color': 'BlanchedAlmond', @@ -18,10 +18,14 @@ {'label': 'Subspecies', 'value': 'Subspecies'}, {'label':'View', 'value': 'View'}, {'label': 'Sex', 'value': 'Sex'}, - {'label': 'Hybrid Status', 'value':'hybrid_stat'}, - {'label': 'Locality', 'value': 'locality'} + {'label': 'Hybrid Status', 'value':'Hybrid_stat'}, + {'label': 'Locality', 'value': 'Locality'} ] DOCS_URL = "https://github.com/Imageomics/dashboard-prototype#how-it-works" +DOCS_LINK = html.A("documentation", + href=DOCS_URL, + target='_blank', + style = ERROR_STYLE) def get_hist_div(mapping): ''' @@ -124,6 +128,12 @@ def get_map_div(): ), html.Div([ + html.H4(''' + Note: Manual zooming may be required to view all points; the map focuses on the centroid of the data. + ''', + id = 'x-variable', #label to avoid nonexistent callback variable + style = {'color': 'MidnightBlue', 'margin-left': 20, 'margin-right': 20} + ) ], id = 'sort-by', #label sort-by box to avoid non-existent label and generate box so button doesn't move between views style = HALF_DIV_STYLE @@ -192,8 +202,8 @@ def get_img_div(df, all_species, img_url): style = QUARTER_DIV_STYLE ), html.Div([ - dcc.Checklist(df.hybrid_stat.unique(), - df.hybrid_stat.unique()[0:2], + dcc.Checklist(df.Hybrid_stat.unique(), + df.Hybrid_stat.unique()[0:2], id = 'hybrid?')], style = QUARTER_DIV_STYLE ), @@ -267,7 +277,10 @@ def get_main_div(hist_div, img_div): # Graphs - Distribution (histogram or map), then pie chart html.Div([ - dcc.Graph(id = 'dist-plot')], style = HALF_DIV_STYLE), + dcc.Loading(id = 'dist-plot-loading', + type = "circle", + color = 'DarkMagenta', + children = dcc.Graph(id = 'dist-plot'))], style = HALF_DIV_STYLE), html.Div([ dcc.Graph(id = 'pie-plot')], style = HALF_DIV_STYLE), @@ -303,20 +316,24 @@ def get_error_div(error_dict): html.H3("Source data does not have '" + feature + "' column. ", style = ERROR_STYLE), html.H4(["Please see the ", - html.A("documentation", - href=DOCS_URL, - target='_blank', - style = ERROR_STYLE), + DOCS_LINK, " for list of required columns."], style = ERROR_STYLE) ]) + elif 'mapping' in error_dict.keys(): + error_msg = error_dict['mapping'] + error_div = html.Div([ + html.H4("Latitude or longitude columns have non-numeric values: " + error_msg + ".", + style = ERROR_STYLE), + html.H4(["Please see the ", + DOCS_LINK, + "."], + style = ERROR_STYLE) + ]) elif 'type' in error_dict.keys(): error_div = html.Div([ html.H4(["The source file is not a valid CSV format, please see the ", - html.A("documentation", - href=DOCS_URL, - target='_blank', - style = ERROR_STYLE), + DOCS_LINK, "."], style = ERROR_STYLE) ]) diff --git a/components/graphs.py b/components/graphs.py index a1def62..e54fef0 100644 --- a/components/graphs.py +++ b/components/graphs.py @@ -26,7 +26,14 @@ def make_hist_plot(df, x_var, color_by, sort_by): color = color_by, color_discrete_sequence = px.colors.qualitative.Bold).update_xaxes(categoryorder = sort_by) - fig.update_layout(title = {'text': f'Distribution of {x_var} Colored by {color_by}'}) + fig.update_layout(title = {'text': f'Distribution of {x_var} Colored by {color_by}'}, + font = {'size': 16}, + margin = { + 'l': 30, + 'r': 20, + 't': 35, + 'b': 20 + }) return fig @@ -46,22 +53,17 @@ def make_map(df, color_by): df = df.copy() # only use entries that have valid lat & lon for mapping df = df.loc[df['lat-lon'].str.contains('unknown') == False] - fig = px.scatter_geo(df, - lat = df.lat, - lon = df.lon, - projection = "natural earth", + fig = px.scatter_mapbox(df, + lat = "Lat", + lon = "Lon", + #projection = "natural earth", custom_data = ["Samples_at_locality", "Species_at_locality", "Subspecies_at_locality"], - size = df.Samples_at_locality, + size = "Samples_at_locality", color = color_by, color_discrete_sequence = px.colors.qualitative.Bold, - title = "Distribution of Samples") - - fig.update_geos(fitbounds = "locations", - showcountries = True, countrycolor = "Grey", - showrivers = True, - showlakes = True, - showland = True, landcolor = "wheat", - showocean = True, oceancolor = "LightBlue") + title = "Distribution of Samples", + zoom = 1, + mapbox_style = "white-bg") fig.update_traces(hovertemplate = "Latitude: %{lat}
"+ @@ -71,6 +73,24 @@ def make_map(df, color_by): "Subspecies at lat/lon: %{customdata[2]}
" ) + fig.update_layout( + font = {'size': 16}, + margin = { + 'l': 20, + 'r': 20, + 't': 35, + 'b': 20 + }, + mapbox_layers = [{ + "below": "traces", + "sourcetype": "raster", + "sourceattribution": "Esri, Maxar, Earthstar Geographics, and the GIS User Community", + "source": ["https://services.arcgisonline.com/arcgis/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}"] + # Usage and Licensing (ArcGIS World Imagery): https://services.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer + # Style: https://roblabs.com/xyz-raster-sources/styles/arcgis-world-imagery.json + }] + ) + return fig def make_pie_plot(df, var): @@ -97,6 +117,13 @@ def make_pie_plot(df, var): color_discrete_sequence = px.colors.qualitative.Bold) pie_fig.update_traces(textposition = 'inside', textinfo = 'percent+label') - pie_fig.update_layout(title = {'text': f'Percentage Breakdown of {var}'}) + pie_fig.update_layout(title = {'text': f'Percentage Breakdown of {var}'}, + font = {'size': 16}, + margin = { + 'l': 20, + 'r': 20, + 't': 35, + 'b': 20 + }) return pie_fig diff --git a/components/query.py b/components/query.py index dbf57b3..d4e4813 100644 --- a/components/query.py +++ b/components/query.py @@ -17,7 +17,7 @@ def get_data(df, mapping, features): df - DataFrame of the data to visualize. mapping - Boolean. True when lat/lon are given in dataset. features - List of features (columns) included in the DataFrame. This is a subset of the suggested columns: - 'Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename' + 'Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename' Returns: -------- @@ -29,24 +29,24 @@ def get_data(df, mapping, features): # Will likely choose to calculate and return this in later instance cat_list = [{'label': 'Species', 'value': 'Species'}, {'label': 'Subspecies', 'value': 'Subspecies'}, - {'label':'View', 'value': 'View'}, + {'label': 'View', 'value': 'View'}, {'label': 'Sex', 'value': 'Sex'}, - {'label': 'Hybrid Status', 'value':'hybrid_stat'}, - {'label': 'Locality', 'value': 'locality'} + {'label': 'Hybrid Status', 'value':'Hybrid_stat'}, + {'label': 'Locality', 'value': 'Locality'} ] df = df.copy() df = df.fillna('unknown') - features.append('locality') + features.append('Locality') # If we don't have lat/lon, just return DataFrame with otherwise required features. if not mapping: - if 'locality' not in df.columns: - df['locality'] = 'unknown' + if 'Locality' not in df.columns: + df['Locality'] = 'unknown' return df[features], cat_list # else lat and lon are in dataset, so process locality information - df['lat-lon'] = df['lat'].astype(str) + '|' + df['lon'].astype(str) + df['lat-lon'] = df['Lat'].astype(str) + '|' + df['Lon'].astype(str) df["Samples_at_locality"] = df['lat-lon'].map(df['lat-lon'].value_counts()) # will duplicate if multiple views of same sample # Count and record number of species and subspecies at each lat-lon @@ -56,8 +56,8 @@ def get_data(df, mapping, features): df.loc[df['lat-lon'] == lat_lon, "Species_at_locality"] = ", ".join(species_list) df.loc[df['lat-lon'] == lat_lon, "Subspecies_at_locality"] = ", ".join(subspecies_list) - if 'locality' not in df.columns: - df['locality'] = df['lat-lon'] # contains "unknown" if lat or lon null + if 'Locality' not in df.columns: + df['Locality'] = df['lat-lon'] # contains "unknown" if lat or lon null new_features = ['lat-lon', "Samples_at_locality", "Species_at_locality", "Subspecies_at_locality"] for feature in new_features: @@ -157,12 +157,12 @@ def get_filenames(df, subspecies, view, sex, hybrid, num_images): df_sub = df.loc[df.Subspecies.isin(subspecies)].copy() df_sub = df_sub.loc[df_sub.View.isin(view)] df_sub = df_sub.loc[df_sub.Sex.isin(sex)] - df_sub = df_sub.loc[df_sub.hybrid_stat.isin(hybrid)] + df_sub = df_sub.loc[df_sub.Hybrid_stat.isin(hybrid)] num_entries = len(df_sub) # Filter out any entries that have missing filenames or URLs: df_sub = df_sub.loc[df_sub.Image_filename != 'unknown'] - df_sub = df_sub.loc[df_sub.file_url != 'unknown'] + df_sub = df_sub.loc[df_sub.File_url != 'unknown'] max_imgs = len(df_sub) missing_vals = num_entries - max_imgs if max_imgs > 0: @@ -172,7 +172,7 @@ def get_filenames(df, subspecies, view, sex, hybrid, num_images): num = min(num_images, max_imgs) df_filtered = df_sub.sample(num) filenames = df_filtered.Image_filename.astype('string').values - filepaths = df_filtered.file_url.astype('string').values + filepaths = df_filtered.File_url.astype('string').values #return list of filenames for min(user-selected, available) images randomly selected images from the filtered dataset return list(filenames), list(filepaths) # If there aren't any images to display, check if there are no such entries or just missing information. diff --git a/dashboard.py b/dashboard.py index 933a745..9871148 100644 --- a/dashboard.py +++ b/dashboard.py @@ -1,4 +1,5 @@ import pandas as pd +import numpy as np import base64 import io import json @@ -80,13 +81,21 @@ def parse_contents(contents, filename): # If no image urls, disable sample image options mapping = True img_urls = True - features = ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename'] + features = ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename'] included_features = [] + df.columns = df.columns.str.capitalize() for feature in features: if feature not in list(df.columns): - if feature == 'lat' or feature == 'lon': - mapping = False - elif feature == 'file_url': + if feature == 'Lat' or feature == 'Lon': + if feature == 'Lon': + if 'Long' not in list(df.columns): + mapping = False + else: + df = df.rename(columns = {"Long": "Lon"}) + included_features.append('Lon') + else: + mapping = False + elif feature == 'File_url': img_urls = False elif feature == 'Image_filename': # If 'Image_filename' missing, return missing column if 'file_url' is included. @@ -97,6 +106,18 @@ def parse_contents(contents, filename): else: included_features.append(feature) + # Check for lat/lon bounds & type if columns exist + if mapping: + try: + # Check lat and lon within appropriate ranges (lat: [-90, 90], lon: [-180, 180]) + valid_lat = df['Lat'].astype(float).between(-90, 90) + df.loc[~valid_lat, 'Lat'] = 'unknown' + valid_lon = df['Lon'].astype(float).between(-180, 180) + df.loc[~valid_lon, 'Lon'] = 'unknown' + except ValueError as e: + print(e) + return json.dumps({'error': {'mapping': str(e)}}) + # get dataset-determined static data: # the dataframe and categorical features - processed for map view if mapping is True # all possible species, subspecies diff --git a/dashboard_preview_hist.png b/dashboard_preview_hist.png index e19f557..fbafb71 100644 Binary files a/dashboard_preview_hist.png and b/dashboard_preview_hist.png differ diff --git a/dashboard_preview_map.png b/dashboard_preview_map.png index e6d11fd..888aede 100644 Binary files a/dashboard_preview_map.png and b/dashboard_preview_map.png differ diff --git a/test_data/HCGSD_test_latLonOOB.csv b/test_data/HCGSD_test_latLonOOB.csv new file mode 100644 index 0000000..9cb0a65 --- /dev/null +++ b/test_data/HCGSD_test_latLonOOB.csv @@ -0,0 +1,11 @@ +NHM_Specimen,Image_filename,View,Species,Subspecies,Sex,addit_taxa_info,type_stat,hybrid_stat,in_reduced,locality,lat,lon,speciesdesig,file_url +10429021,10429021_V_lowres.png,,erato,notabilis,,f._notabilis,,subspecies synonym,1,,-1.583333333,-77.75,e. notabilis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428972,10428972_V_lowres.png,ventral,erato,petiverana,male,petiverana,,valid subspecies,1,Songolica (= Zongolica) MEX VC,18.66666667,-96.98333333,e. petiverana, +10429172,,ventral,,petiverana,male,petiverana,,valid subspecies,1,San Ramon NIC ZE,92,-84.68333333,e. petiverana,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428595,10428595_D_lowres.png,dorsal,erato,phyllis,male,f._phyllis,,subspecies synonym,1,Resistencia ARG CH,-27.45,-58.98333333,e. phyllis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ +10428140,10428140_V_lowres.png,ventral,,plesseni,male,plesseni,,valid subspecies,1,Banos ECD TU,-1.4,-740,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428250,10428250_V_lowres.png,ventral,melpomene,,male,ab._rubra,,subspecies synonym,1,Caradoc (Hda) PER CU,-13.36666667,-70.95,m. schunkei,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10427979,,dorsal,melpomene,rosina_S,male,rosina_S,,valid subspecies,1,Turrialba CRI CA,9.883333333,-83.63333333,m. rosina,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ +10428803,10428803_D_lowres.png,dorsal,erato,guarica,female,guarica,,valid subspecies,1,Fusagasuga COL CN,4.35,-74.36666667,e. guarica, +10428169,10428169_V_lowres.png,ventral,melpomene,plesseni,male,f._pura,ST,subspecies synonym,1,Canelos ECD PA,-1.583333333,730,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428321,10428321_D_lowres.png,,melpomene,nanna,male,nanna,ST,valid subspecies,1,Espirito Santo BRA ES,-20.33333333,-40.28333333,m. nanna,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ \ No newline at end of file diff --git a/test_data/HCGSD_test_latLong.csv b/test_data/HCGSD_test_latLong.csv new file mode 100644 index 0000000..bf88cbc --- /dev/null +++ b/test_data/HCGSD_test_latLong.csv @@ -0,0 +1,11 @@ +NHM_Specimen,Image_filename,View,Species,Subspecies,Sex,addit_taxa_info,type_stat,hybrid_stat,in_reduced,locality,lat,long,speciesdesig,file_url +10429021,10429021_V_lowres.png,,erato,notabilis,,f._notabilis,,subspecies synonym,1,,-1.583333333,-77.75,e. notabilis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428972,10428972_V_lowres.png,ventral,erato,petiverana,male,petiverana,,valid subspecies,1,Songolica (= Zongolica) MEX VC,18.66666667,-96.98333333,e. petiverana, +10429172,,ventral,,petiverana,male,petiverana,,valid subspecies,1,San Ramon NIC ZE,89,-84.68333333,e. petiverana,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428595,10428595_D_lowres.png,dorsal,erato,phyllis,male,f._phyllis,,subspecies synonym,1,Resistencia ARG CH,-27.45,-58.98333333,e. phyllis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ +10428140,10428140_V_lowres.png,ventral,,plesseni,male,plesseni,,valid subspecies,1,Banos ECD TU,-1.4,-74,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428250,10428250_V_lowres.png,ventral,melpomene,,male,ab._rubra,,subspecies synonym,1,Caradoc (Hda) PER CU,-13.36666667,-70.95,m. schunkei,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10427979,,dorsal,melpomene,rosina_S,male,rosina_S,,valid subspecies,1,Turrialba CRI CA,9.883333333,-83.63333333,m. rosina,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ +10428803,10428803_D_lowres.png,dorsal,erato,guarica,female,guarica,,valid subspecies,1,Fusagasuga COL CN,4.35,-74.36666667,e. guarica, +10428169,10428169_V_lowres.png,ventral,melpomene,plesseni,male,f._pura,ST,subspecies synonym,1,Canelos ECD PA,-1.583333333,73,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428321,10428321_D_lowres.png,,melpomene,nanna,male,nanna,ST,valid subspecies,1,Espirito Santo BRA ES,-20.33333333,-40.28333333,m. nanna,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ \ No newline at end of file diff --git a/test_data/HCGSD_test_nonnumeric.csv b/test_data/HCGSD_test_nonnumeric.csv new file mode 100644 index 0000000..2cc8cc2 --- /dev/null +++ b/test_data/HCGSD_test_nonnumeric.csv @@ -0,0 +1,11 @@ +NHM_Specimen,Image_filename,View,Species,Subspecies,Sex,addit_taxa_info,type_stat,hybrid_stat,in_reduced,locality,lat,lon,speciesdesig,file_url +10429021,10429021_V_lowres.png,,erato,notabilis,,f._notabilis,,subspecies synonym,1,,-1.583333333,-77.75,e. notabilis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428972,10428972_V_lowres.png,ventral,erato,petiverana,male,petiverana,,valid subspecies,1,Songolica (= Zongolica) MEX VC,18.66666667,,e. petiverana, +10429172,,ventral,,petiverana,male,petiverana,,valid subspecies,1,San Ramon NIC ZE,92,-84.68333333,e. petiverana,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428595,10428595_D_lowres.png,dorsal,erato,phyllis,male,f._phyllis,,subspecies synonym,1,Resistencia ARG CH,-27.45,-58.98333333,e. phyllis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ +10428140,10428140_V_lowres.png,ventral,,plesseni,male,plesseni,,valid subspecies,1,Banos ECD TU,,Non numeric,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428250,10428250_V_lowres.png,ventral,melpomene,,male,ab._rubra,,subspecies synonym,1,Caradoc (Hda) PER CU,-13.36666667,-70.95,m. schunkei,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10427979,,dorsal,melpomene,rosina_S,male,rosina_S,,valid subspecies,1,Turrialba CRI CA,Non numeric,-83.63333333,m. rosina,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ +10428803,10428803_D_lowres.png,dorsal,erato,guarica,female,guarica,,valid subspecies,1,Fusagasuga COL CN,4.35,-74.36666667,e. guarica, +10428169,10428169_V_lowres.png,ventral,melpomene,plesseni,male,f._pura,ST,subspecies synonym,1,Canelos ECD PA,-1.583333333,,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428321,10428321_D_lowres.png,,melpomene,nanna,male,nanna,ST,valid subspecies,1,Espirito Santo BRA ES,-20.33333333,-40.28333333,m. nanna,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ \ No newline at end of file diff --git a/tests/components/test_divs.py b/tests/components/test_divs.py index cf162a4..28341cc 100644 --- a/tests/components/test_divs.py +++ b/tests/components/test_divs.py @@ -29,7 +29,7 @@ def test_get_img_div(): 'Subspecies': ['subspecies1', 'subspecies2', 'subspecies4'], 'View': ['ventral', 'ventral', 'dorsal'], 'Sex': ['male', 'female', 'female'], - 'hybrid_stat': ['subspecies synonym', 'valid subspecies', 'subspecies synonym'] + 'Hybrid_stat': ['subspecies synonym', 'valid subspecies', 'subspecies synonym'] } df = pd.DataFrame(data = data) diff --git a/tests/components/test_graphs.py b/tests/components/test_graphs.py index 7f6cd88..1f2d4bf 100644 --- a/tests/components/test_graphs.py +++ b/tests/components/test_graphs.py @@ -4,7 +4,9 @@ # Define test data df = pd.read_csv("test_data/HCGSD_full_testNA.csv") -included_features = ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', 'file_url', 'Image_filename'] +# Update columns since not running through parse +df.columns = df.columns.str.capitalize() +included_features = ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', 'File_url', 'Image_filename'] processed_df, cat_list = get_data(df, True, included_features) def test_make_hist_plot(): @@ -25,7 +27,7 @@ def test_make_map(): # Map plot output output = make_map(processed_df, "Species") output_data = output['data', 0] - assert output_data.type == "scattergeo" + assert output_data.type == "scattermapbox" #test for uknowns in data and check it's proper type assert 'unknown' not in output_data['customdata'] diff --git a/tests/components/test_query.py b/tests/components/test_query.py index 9d8eb67..7be5180 100644 --- a/tests/components/test_query.py +++ b/tests/components/test_query.py @@ -23,23 +23,23 @@ def test_get_data(self): data = { 'Species': ['melpomene', 'melpomene', 'erato', 'melpomene', 'erato', 'species3'], 'Subspecies': ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', None], - 'lat': [-13.43, 5.25, 5.25, 9.9, 5.25, 9.9], - 'lon': [-70.38, -55.25, -55.25, -83.73, -55.25, -55.25] + 'Lat': [-13.43, 5.25, 5.25, 9.9, 5.25, 9.9], + 'Lon': [-70.38, -55.25, -55.25, -83.73, -55.25, -55.25] } cat_list = [{'label': 'Species', 'value': 'Species'}, {'label': 'Subspecies', 'value': 'Subspecies'}, {'label':'View', 'value': 'View'}, {'label': 'Sex', 'value': 'Sex'}, - {'label': 'Hybrid Status', 'value':'hybrid_stat'}, - {'label': 'Locality', 'value': 'locality'}] - features = ['Species', 'Subspecies', 'lat', 'lon'] + {'label': 'Hybrid Status', 'value':'Hybrid_stat'}, + {'label': 'Locality', 'value': 'Locality'}] + features = ['Species', 'Subspecies', 'Lat', 'Lon'] locality = ['-13.43|-70.38', '5.25|-55.25', '5.25|-55.25', '9.9|-83.73','5.25|-55.25', '9.9|-55.25'] # Test with mapping = True (location data) df = pd.DataFrame(data = data) result_df, result_list = get_data(df, True, features) self.assertEqual(result_df['lat-lon'].tolist(), locality) - self.assertEqual(result_df['locality'].tolist(), locality) + self.assertEqual(result_df['Locality'].tolist(), locality) self.assertEqual(result_df["Samples_at_locality"].tolist(), [1,3,3,1,3,1]) self.assertEqual(result_df["Species_at_locality"].tolist(), ['melpomene', 'melpomene, erato', 'melpomene, erato', 'melpomene', 'melpomene, erato', 'species3']) self.assertEqual(result_df["Subspecies_at_locality"].tolist(), ['schunkei', 'nanna, erato, guarica', 'nanna, erato, guarica', 'rosina_N', 'nanna, erato, guarica', 'unknown']) @@ -48,8 +48,7 @@ def test_get_data(self): # Test with mapping = False (no location data) df2 = pd.DataFrame(data = {key: data[key] for key in ['Species', 'Subspecies']}) result_df2, result2_list = get_data(df2, False, features[:2]) - #self.assertEqual('locality' not in result_df2.columns, True) - self.assertEqual(result_df2['locality'].tolist(), ['unknown' for i in range(len(locality))]) + self.assertEqual(result_df2['Locality'].tolist(), ['unknown' for i in range(len(locality))]) self.assertEqual(result_df2["Species"].tolist(), ['melpomene', 'melpomene', 'erato', 'melpomene', 'erato', 'species3']) self.assertEqual(result_df2["Subspecies"].tolist(), ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', 'unknown']) self.assertEqual(result2_list, cat_list) @@ -62,7 +61,7 @@ def test_get_filenames(self): 'Subspecies': ['schunkei', 'nanna', 'erato', 'rosina_N', 'guarica', 'subspecies6', 'subspecies6'], 'View': ['ventral', 'ventral', 'ventral', 'dorsal', 'dorsal', 'ventral', 'dorsal'], 'Sex': ['male', 'female', 'female', 'male', 'female', 'male', 'female'], - 'hybrid_stat': ['subspecies synonym', + 'Hybrid_stat': ['subspecies synonym', 'valid subspecies', 'subspecies synonym', 'valid subspecies', @@ -76,7 +75,7 @@ def test_get_filenames(self): '10428804_D_lowres.png', 'unknown', '10428723_V_lowres.png'], - 'file_url': [BASE_URL_V, + 'File_url': [BASE_URL_V, BASE_URL_V, BASE_URL_V, BASE_URL_D, diff --git a/tests/test_app_callbacks.py b/tests/test_app_callbacks.py index 0548729..bb7a44b 100644 --- a/tests/test_app_callbacks.py +++ b/tests/test_app_callbacks.py @@ -3,7 +3,7 @@ from dashboard import update_dist_view, update_dist_plot, update_pie_plot, set_subspecies_options, update_display # Define test data -data = {'processed_df': '{"columns":["Species","Subspecies","View","Sex","hybrid_stat","lat","lon","lat-lon","Samples_at_locality","Species_at_locality","Subspecies_at_locality"],"index":[0,1,2,3,4,5,6,7,8,9],"data":[["erato","notabilis","unknown","unknown","subspecies synonym",-1.583333333,-77.75,"-1.583333333|-77.75",1,"erato","notabilis"],["erato","petiverana","ventral","male","valid subspecies",18.66666667,-96.98333333,"18.66666667|-96.98333333",1,"erato","petiverana"],["unknown","petiverana","ventral","male","valid subspecies","unknown",-84.68333333,"unknown|-84.68333333",1,"unknown","petiverana"],["erato","phyllis","dorsal","male","subspecies synonym",-27.45,-58.98333333,"-27.45|-58.98333333",1,"erato","phyllis"],["unknown","plesseni","ventral","male","valid subspecies",-1.4,"unknown","-1.4|unknown",1,"unknown","plesseni"],["melpomene","unknown","ventral","male","subspecies synonym",-13.36666667,-70.95,"-13.36666667|-70.95",1,"melpomene","unknown"],["melpomene","rosina_S","dorsal","male","valid subspecies",9.883333333,-83.63333333,"9.883333333|-83.63333333",1,"melpomene","rosina_S"],["erato","guarica","dorsal","female","valid subspecies",4.35,-74.36666667,"4.35|-74.36666667",1,"erato","guarica"],["melpomene","plesseni","ventral","male","subspecies synonym",-1.583333333,"unknown","-1.583333333|unknown",1,"melpomene","plesseni"],["melpomene","nanna","unknown","male","valid subspecies",-20.33333333,-40.28333333,"-20.33333333|-40.28333333",1,"melpomene","nanna"]]}', +data = {'processed_df': '{"columns":["Species","Subspecies","View","Sex","Hybrid_stat","Lat","Lon","lat-lon","Samples_at_locality","Species_at_locality","Subspecies_at_locality"],"index":[0,1,2,3,4,5,6,7,8,9],"data":[["erato","notabilis","unknown","unknown","subspecies synonym",-1.583333333,-77.75,"-1.583333333|-77.75",1,"erato","notabilis"],["erato","petiverana","ventral","male","valid subspecies",18.66666667,-96.98333333,"18.66666667|-96.98333333",1,"erato","petiverana"],["unknown","petiverana","ventral","male","valid subspecies","unknown",-84.68333333,"unknown|-84.68333333",1,"unknown","petiverana"],["erato","phyllis","dorsal","male","subspecies synonym",-27.45,-58.98333333,"-27.45|-58.98333333",1,"erato","phyllis"],["unknown","plesseni","ventral","male","valid subspecies",-1.4,"unknown","-1.4|unknown",1,"unknown","plesseni"],["melpomene","unknown","ventral","male","subspecies synonym",-13.36666667,-70.95,"-13.36666667|-70.95",1,"melpomene","unknown"],["melpomene","rosina_S","dorsal","male","valid subspecies",9.883333333,-83.63333333,"9.883333333|-83.63333333",1,"melpomene","rosina_S"],["erato","guarica","dorsal","female","valid subspecies",4.35,-74.36666667,"4.35|-74.36666667",1,"erato","guarica"],["melpomene","plesseni","ventral","male","subspecies synonym",-1.583333333,"unknown","-1.583333333|unknown",1,"melpomene","plesseni"],["melpomene","nanna","unknown","male","valid subspecies",-20.33333333,-40.28333333,"-20.33333333|-40.28333333",1,"melpomene","nanna"]]}', 'all_species': {'Erato': ['Any-Erato', 'notabilis', 'petiverana', 'phyllis', 'guarica'], 'Unknown': ['Any-Unknown', 'petiverana', 'plesseni'], 'Melpomene': ['Any-Melpomene', 'unknown', 'rosina_S', 'plesseni', 'nanna'], 'Any': ['Any', 'notabilis', 'petiverana', 'phyllis', 'plesseni', 'unknown', 'rosina_S', 'guarica', 'nanna']}, 'mapping': True, 'images': True} @@ -34,7 +34,7 @@ def test_update_dist_plot_call(): # Map plot output output2 = update_dist_plot('Species', 'Subspecies', 'alpha', "Show Histogram", jsonified_data) - assert output2['data', 0].type == "scattergeo" + assert output2['data', 0].type == "scattermapbox" def test_update_pie_plot(): diff --git a/tests/test_filters.py b/tests/test_filters.py index 772fb1f..778b2be 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -19,15 +19,16 @@ def generate_mock_upload(filepath): contents = "".join([content_type, ",", content_string]) return contents +ALL_COLUMNS = ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', + 'File_url', 'Image_filename', 'Locality', 'lat-lon', + 'Samples_at_locality', 'Species_at_locality', 'Subspecies_at_locality'] # Define Test Cases test_cases = [ { # Check with full columns expected "filepath": "test_data/HCGSD_full_testNA.csv", "filename": "HCGSD_full_testNA.csv", - "expected_columns": ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', - 'file_url', 'Image_filename', 'locality', 'lat-lon', - 'Samples_at_locality', 'Species_at_locality', 'Subspecies_at_locality'], + "expected_columns": ALL_COLUMNS, "expected_mapping": True, "expected_images": True }, @@ -35,16 +36,16 @@ def generate_mock_upload(filepath): "filepath": "test_data/HCGSD_test_no_mapping.csv", "filename": "HCGSD_test_no_mapping.csv", # 'lon' in data, 'lat' not, 'lon' maintained - "expected_columns": ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lon', - 'locality'], + "expected_columns": ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lon', + 'Locality'], "expected_mapping": False, "expected_images": False }, { # Check with missing image URL information "filepath": "test_data/HCGSD_testNA.csv", "filename": "HCGSD_testNA.csv", - "expected_columns": ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', - 'Image_filename', 'locality', 'lat-lon', + "expected_columns": ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', 'Lon', + 'Image_filename', 'Locality', 'lat-lon', 'Samples_at_locality', 'Species_at_locality', 'Subspecies_at_locality'], "expected_mapping": True, "expected_images": False @@ -52,11 +53,25 @@ def generate_mock_upload(filepath): { # Check with just missing mapping information "filepath": "test_data/HCGSD_test_nolon.csv", "filename": "HCGSD_test_nolon.csv", - "expected_columns": ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', - 'file_url', 'Image_filename', 'locality'], + "expected_columns": ['Species', 'Subspecies', 'View', 'Sex', 'Hybrid_stat', 'Lat', + 'File_url', 'Image_filename', 'Locality'], "expected_mapping": False, "expected_images": True }, + { # Check with full columns expected, but lat/lon out of bounds (1 lat and 2 lon) + "filepath": "test_data/HCGSD_test_latLonOOB.csv", + "filename": "HCGSD_test_latLonOOB.csv", + "expected_columns": ALL_COLUMNS, + "expected_mapping": True, + "expected_images": True + }, + { # Check with full columns expected, but 'long' instead of 'lon' + "filepath": "test_data/HCGSD_test_latLong.csv", + "filename": "HCGSD_test_latLong.csv", + "expected_columns": ALL_COLUMNS, + "expected_mapping": True, + "expected_images": True + }, ] def test_parse_contents(): @@ -70,3 +85,7 @@ def test_parse_contents(): assert list(dff.columns) == case['expected_columns'] assert output['mapping'] == case['expected_mapping'] assert output['images'] == case['expected_images'] + + if case['filename'] == "HCGSD_test_latLonOOB.csv": + assert len(dff.loc[dff.Lat == 'unknown']) == 1 + assert len(dff.loc[dff.Lon == 'unknown']) == 2