Skip to content

Dev/neo4j 4 #172

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 10 additions & 15 deletions DEVELOP.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,50 +16,45 @@ cd docker && ./test-cpu-local.sh
```


## Local (non-docker) - DEPRECATED
### Install Git Checkout:
## Native - DEPRECATED
### Install Git Checkout - DEPRECATED

1. Remove any version installed with pip
`pip uninstall graphistry`
2. Install local git checkout
`./setup.py develop`

### Running Tests Locally
### Running Tests Locally - DEPRECATED

1. Install our test dependencies:`nose` and `mock`.
2. Run `nosetests` in the root pygraphistry folder (or `nose` or `nose2`).
3. `python setup.py test`
4. To duplicate CI tests, in python2 and 3, run ` time flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics`


### Via Docker (GPU)

1. "docker pull graphistry/graphistry-forge-base:v<latest>"
2. ./test-docker.sh

## CI

We intend to move to Github Actions / DockerHub Automated Builds for CPU and TBD for GPU

### Travis
### Travis - DEPRECATED

Travis CI automatically runs on every branch (with a Travis CI file). To configure, go to the [Travis CI account](https://travis-ci.org/graphistry/pygraphistry) .

### Uninstall Git Checkout
### Native - Uninstall Git Checkout - DEPRECATED

Uninstall the local checkout (useful to rollback to packaged version) with `./setup.py develop --uninstall`

## Release Procedure: Merge, Tag, Package, & Upload
## Publish: Merge, Tag, & Upload

0. Merge the desired PR to master and switch to master head (`git checkout master && git pull`)
1. Merge the desired PR to master and switch to master head (`git checkout master && git pull`)

1. Tag the repository with a new version number. We use semantic version numbers of the form *X.Y.Z*.
2. Tag the repository with a new version number. We use semantic version numbers of the form *X.Y.Z*.

```sh
git tag X.Y.Z
git push --tags
```

2. Toggle version as active at [ReadTheDocs](https://readthedocs.org/projects/pygraphistry/versions/)
3. Toggle version as active at [ReadTheDocs](https://readthedocs.org/projects/pygraphistry/versions/)

3. Confirm PyPI picked up the [release](https://pypi.org/project/graphistry/)
4. Confirm PyPI picked up the [release](https://pypi.org/project/graphistry/)
2 changes: 1 addition & 1 deletion docker/test-cpu-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash

python -B -O -m pytest -v graphistry/tests $@
python -B -O -m pytest -vv graphistry/tests $@
2 changes: 1 addition & 1 deletion docker/test-cpu-local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Run tests using local mounts

#docker-compose build
docker-compose build

TEST_CPU_VERSION=${TEST_CPU_VERSION:-latest}

Expand Down
166 changes: 149 additions & 17 deletions graphistry/bolt_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import logging, pandas as pd
from datetime import datetime
from .pygraphistry import util

logger = logging.getLogger(__name__)

node_id_key = u'_bolt_node_id_key'
node_type_key = u'type'
node_label_prefix_key = u'_lbl_'
Expand All @@ -8,20 +12,14 @@
relationship_id_key = u'_bolt_relationship_id'
relationship_type_key = u'type'

def is_neotime(v):
try:
return v.__module__ == 'neotime'
except:
return False

t0 = datetime.min.time()

def stringify_neotimes(df):
#Otherwise currently encountering a toString error
try:
import neo4j
import neotime
df2 = df.copy()
for c in df.columns:
df2[c] = df[c].apply(lambda v: str(v) if is_neotime(v) else v)
return df2
except:
pass


def to_bolt_driver(driver=None):
if driver is None:
Expand All @@ -35,7 +33,6 @@ def to_bolt_driver(driver=None):
raise BoltSupportModuleNotFound()

def bolt_graph_to_edges_dataframe(graph):
import pandas as pd
df = pd.DataFrame([
util.merge_two_dicts(
{ key: value for (key, value) in relationship.items() },
Expand All @@ -48,11 +45,10 @@ def bolt_graph_to_edges_dataframe(graph):
)
for relationship in graph.relationships
])
return stringify_neotimes(df)
return neo_df_to_pd_df(df)


def bolt_graph_to_nodes_dataframe(graph):
import pandas as pd
def bolt_graph_to_nodes_dataframe(graph) -> pd.DataFrame:
df = pd.DataFrame([
util.merge_two_dicts(
{ key: value for (key, value) in node.items() },
Expand All @@ -64,7 +60,143 @@ def bolt_graph_to_nodes_dataframe(graph):
{ node_label_prefix_key + str(label): True for label in node.labels }))
for node in graph.nodes
])
return stringify_neotimes(df)
return neo_df_to_pd_df(df)


## Knowing a col is all-spatial, flatten into primitive cols
def flatten_spatial_col(df : pd.DataFrame, col : str) -> pd.DataFrame:
out_df = df.copy(deep=False)

####

#TODO: Can we do better than duck typing the spatial fields?
try:
out_df[f'{col}_x'] = df[col].apply(lambda v: None if v is None else v.x)
except:
pass

try:
out_df[f'{col}_y'] = df[col].apply(lambda v: None if v is None else v.y)
except:
pass

try:
out_df[f'{col}_z'] = df[col].apply(lambda v: None if v is None else v.z)
except:
pass

try:
out_df[f'{col}_srid'] = df[col].apply(lambda v: None if v is None else v.srid)
except:
pass

try:
out_df[f'{col}_longitude'] = df[col].apply(lambda v: None if v is None else v.longitude)
except:
pass

try:
out_df[f'{col}_latitude'] = df[col].apply(lambda v: None if v is None else v.latitude)
except:
pass

###

out_df[col] = df[col].apply(str)

return out_df





#dtype='obj' -> 'a
def neo_val_to_pd_val(v):

if v is None:
return v

try:
v_mod = v.__module__
except:
return v

#neo4j 3
if v_mod == 'neotime':
return str(v)

#neo4j 4
if v_mod == 'neo4j.time':
if v.__class__ == neo4j.time.DateTime:
return v.to_native() #datetime.datetime
elif v.__class__ == neo4j.time.Date:
return datetime.combine(v.to_native(), t0) #datatime.datatime
elif v.__class__ == neo4j.time.Time:
return pd.to_timedelta(v.iso_format()) #timedelta
elif v.__class__ == neo4j.time.Duration:
#TODO expand out?
return v.iso_format() #str
else:
return str(v)

#handle neo4j.spatial.* later

return v


def stringify_spatial(v):
if v is None:
return None
if isinstance(v, neo4j.spatial.Point):
##TODO rep as JSON / dict?
return str(v)
return v


def get_mod(v):
try:
return v.__module__
except:
return None


## if a col has spatials:
## - all: flatten into new primitive cols
## - some: stringify
def flatten_spatial(df : pd.DataFrame, col : str) -> pd.DataFrame:

any_spatial = (df[col].apply(get_mod) == 'neo4j.spatial').any()
if not any_spatial:
return df

with_vals = df[col].dropna()
if len(with_vals) == 0:
return df

out_df = df.copy(deep=False)

t0 = with_vals[0]
try:
all_t0 = (with_vals.apply(lambda s: s.__class__) == t0.__class__).all()
except:
all_t0 = False

if all_t0:
out_df = flatten_spatial_col(df, col)
else:
out_df[col] = df[col].apply(stringify_spatial)

return out_df


def neo_df_to_pd_df(df):
out_df = df.copy(deep=False)
for col in df:
if df[col].dtype.name == 'object':
out_df[col] = df[col].apply(neo_val_to_pd_val)
out_df = flatten_spatial(out_df, col)
return out_df


class BoltSupportModuleNotFound(Exception):
def __init__(self):
Expand Down
Loading