Skip to content

Commit 95126d7

Browse files
authored
Dev/neo4j 4 (#172)
* infra(docker test): tweak * feat(neo4j 4): handle neo4j.time and neo4j.spatial * fix(logging): remove stray printfs
1 parent 728944c commit 95126d7

File tree

7 files changed

+455
-61
lines changed

7 files changed

+455
-61
lines changed

DEVELOP.md

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,50 +16,45 @@ cd docker && ./test-cpu-local.sh
1616
```
1717

1818

19-
## Local (non-docker) - DEPRECATED
20-
### Install Git Checkout:
19+
## Native - DEPRECATED
20+
### Install Git Checkout - DEPRECATED
2121

2222
1. Remove any version installed with pip
2323
`pip uninstall graphistry`
2424
2. Install local git checkout
2525
`./setup.py develop`
2626

27-
### Running Tests Locally
27+
### Running Tests Locally - DEPRECATED
2828

2929
1. Install our test dependencies:`nose` and `mock`.
3030
2. Run `nosetests` in the root pygraphistry folder (or `nose` or `nose2`).
3131
3. `python setup.py test`
3232
4. To duplicate CI tests, in python2 and 3, run ` time flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics`
3333

3434

35-
### Via Docker (GPU)
36-
37-
1. "docker pull graphistry/graphistry-forge-base:v<latest>"
38-
2. ./test-docker.sh
39-
4035
## CI
4136

4237
We intend to move to Github Actions / DockerHub Automated Builds for CPU and TBD for GPU
4338

44-
### Travis
39+
### Travis - DEPRECATED
4540

4641
Travis CI automatically runs on every branch (with a Travis CI file). To configure, go to the [Travis CI account](https://travis-ci.org/graphistry/pygraphistry) .
4742

48-
### Uninstall Git Checkout
43+
### Native - Uninstall Git Checkout - DEPRECATED
4944

5045
Uninstall the local checkout (useful to rollback to packaged version) with `./setup.py develop --uninstall`
5146

52-
## Release Procedure: Merge, Tag, Package, & Upload
47+
## Publish: Merge, Tag, & Upload
5348

54-
0. Merge the desired PR to master and switch to master head (`git checkout master && git pull`)
49+
1. Merge the desired PR to master and switch to master head (`git checkout master && git pull`)
5550

56-
1. Tag the repository with a new version number. We use semantic version numbers of the form *X.Y.Z*.
51+
2. Tag the repository with a new version number. We use semantic version numbers of the form *X.Y.Z*.
5752

5853
```sh
5954
git tag X.Y.Z
6055
git push --tags
6156
```
6257

63-
2. Toggle version as active at [ReadTheDocs](https://readthedocs.org/projects/pygraphistry/versions/)
58+
3. Toggle version as active at [ReadTheDocs](https://readthedocs.org/projects/pygraphistry/versions/)
6459

65-
3. Confirm PyPI picked up the [release](https://pypi.org/project/graphistry/)
60+
4. Confirm PyPI picked up the [release](https://pypi.org/project/graphistry/)

docker/test-cpu-entrypoint.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
#!/bin/bash
22

3-
python -B -O -m pytest -v graphistry/tests $@
3+
python -B -O -m pytest -vv graphistry/tests $@

docker/test-cpu-local.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# Run tests using local mounts
44

5-
#docker-compose build
5+
docker-compose build
66

77
TEST_CPU_VERSION=${TEST_CPU_VERSION:-latest}
88

graphistry/bolt_util.py

Lines changed: 149 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
import logging, pandas as pd
2+
from datetime import datetime
13
from .pygraphistry import util
24

5+
logger = logging.getLogger(__name__)
6+
37
node_id_key = u'_bolt_node_id_key'
48
node_type_key = u'type'
59
node_label_prefix_key = u'_lbl_'
@@ -8,20 +12,14 @@
812
relationship_id_key = u'_bolt_relationship_id'
913
relationship_type_key = u'type'
1014

11-
def is_neotime(v):
12-
try:
13-
return v.__module__ == 'neotime'
14-
except:
15-
return False
16-
15+
t0 = datetime.min.time()
1716

18-
def stringify_neotimes(df):
19-
#Otherwise currently encountering a toString error
17+
try:
18+
import neo4j
2019
import neotime
21-
df2 = df.copy()
22-
for c in df.columns:
23-
df2[c] = df[c].apply(lambda v: str(v) if is_neotime(v) else v)
24-
return df2
20+
except:
21+
pass
22+
2523

2624
def to_bolt_driver(driver=None):
2725
if driver is None:
@@ -35,7 +33,6 @@ def to_bolt_driver(driver=None):
3533
raise BoltSupportModuleNotFound()
3634

3735
def bolt_graph_to_edges_dataframe(graph):
38-
import pandas as pd
3936
df = pd.DataFrame([
4037
util.merge_two_dicts(
4138
{ key: value for (key, value) in relationship.items() },
@@ -48,11 +45,10 @@ def bolt_graph_to_edges_dataframe(graph):
4845
)
4946
for relationship in graph.relationships
5047
])
51-
return stringify_neotimes(df)
48+
return neo_df_to_pd_df(df)
5249

5350

54-
def bolt_graph_to_nodes_dataframe(graph):
55-
import pandas as pd
51+
def bolt_graph_to_nodes_dataframe(graph) -> pd.DataFrame:
5652
df = pd.DataFrame([
5753
util.merge_two_dicts(
5854
{ key: value for (key, value) in node.items() },
@@ -64,7 +60,143 @@ def bolt_graph_to_nodes_dataframe(graph):
6460
{ node_label_prefix_key + str(label): True for label in node.labels }))
6561
for node in graph.nodes
6662
])
67-
return stringify_neotimes(df)
63+
return neo_df_to_pd_df(df)
64+
65+
66+
## Knowing a col is all-spatial, flatten into primitive cols
67+
def flatten_spatial_col(df : pd.DataFrame, col : str) -> pd.DataFrame:
68+
out_df = df.copy(deep=False)
69+
70+
####
71+
72+
#TODO: Can we do better than duck typing the spatial fields?
73+
try:
74+
out_df[f'{col}_x'] = df[col].apply(lambda v: None if v is None else v.x)
75+
except:
76+
pass
77+
78+
try:
79+
out_df[f'{col}_y'] = df[col].apply(lambda v: None if v is None else v.y)
80+
except:
81+
pass
82+
83+
try:
84+
out_df[f'{col}_z'] = df[col].apply(lambda v: None if v is None else v.z)
85+
except:
86+
pass
87+
88+
try:
89+
out_df[f'{col}_srid'] = df[col].apply(lambda v: None if v is None else v.srid)
90+
except:
91+
pass
92+
93+
try:
94+
out_df[f'{col}_longitude'] = df[col].apply(lambda v: None if v is None else v.longitude)
95+
except:
96+
pass
97+
98+
try:
99+
out_df[f'{col}_latitude'] = df[col].apply(lambda v: None if v is None else v.latitude)
100+
except:
101+
pass
102+
103+
###
104+
105+
out_df[col] = df[col].apply(str)
106+
107+
return out_df
108+
109+
110+
111+
112+
113+
#dtype='obj' -> 'a
114+
def neo_val_to_pd_val(v):
115+
116+
if v is None:
117+
return v
118+
119+
try:
120+
v_mod = v.__module__
121+
except:
122+
return v
123+
124+
#neo4j 3
125+
if v_mod == 'neotime':
126+
return str(v)
127+
128+
#neo4j 4
129+
if v_mod == 'neo4j.time':
130+
if v.__class__ == neo4j.time.DateTime:
131+
return v.to_native() #datetime.datetime
132+
elif v.__class__ == neo4j.time.Date:
133+
return datetime.combine(v.to_native(), t0) #datatime.datatime
134+
elif v.__class__ == neo4j.time.Time:
135+
return pd.to_timedelta(v.iso_format()) #timedelta
136+
elif v.__class__ == neo4j.time.Duration:
137+
#TODO expand out?
138+
return v.iso_format() #str
139+
else:
140+
return str(v)
141+
142+
#handle neo4j.spatial.* later
143+
144+
return v
145+
146+
147+
def stringify_spatial(v):
148+
if v is None:
149+
return None
150+
if isinstance(v, neo4j.spatial.Point):
151+
##TODO rep as JSON / dict?
152+
return str(v)
153+
return v
154+
155+
156+
def get_mod(v):
157+
try:
158+
return v.__module__
159+
except:
160+
return None
161+
162+
163+
## if a col has spatials:
164+
## - all: flatten into new primitive cols
165+
## - some: stringify
166+
def flatten_spatial(df : pd.DataFrame, col : str) -> pd.DataFrame:
167+
168+
any_spatial = (df[col].apply(get_mod) == 'neo4j.spatial').any()
169+
if not any_spatial:
170+
return df
171+
172+
with_vals = df[col].dropna()
173+
if len(with_vals) == 0:
174+
return df
175+
176+
out_df = df.copy(deep=False)
177+
178+
t0 = with_vals[0]
179+
try:
180+
all_t0 = (with_vals.apply(lambda s: s.__class__) == t0.__class__).all()
181+
except:
182+
all_t0 = False
183+
184+
if all_t0:
185+
out_df = flatten_spatial_col(df, col)
186+
else:
187+
out_df[col] = df[col].apply(stringify_spatial)
188+
189+
return out_df
190+
191+
192+
def neo_df_to_pd_df(df):
193+
out_df = df.copy(deep=False)
194+
for col in df:
195+
if df[col].dtype.name == 'object':
196+
out_df[col] = df[col].apply(neo_val_to_pd_val)
197+
out_df = flatten_spatial(out_df, col)
198+
return out_df
199+
68200

69201
class BoltSupportModuleNotFound(Exception):
70202
def __init__(self):

0 commit comments

Comments
 (0)