diff --git a/modules/ROOT/pages/faq.adoc b/modules/ROOT/pages/faq.adoc index 057a4fd..406545c 100644 --- a/modules/ROOT/pages/faq.adoc +++ b/modules/ROOT/pages/faq.adoc @@ -101,7 +101,7 @@ Refer to xref:overview.adoc#_spark_and_scala_compatibility[this page] to know wh This might happen when creating a new graph using the GDS library. The issue here is that the query is run the first time to extract the DataFrame schema and then is run again to get the data. -To avoid this issue you can use the xref:quickstart.adoc#user-defined-schema[user defined schema] approach. +To avoid this issue you can use the xref:read/define-schema.adoc#custom-schema[user defined schema] approach. == Databricks setup diff --git a/modules/ROOT/pages/gds.adoc b/modules/ROOT/pages/gds.adoc index 8bb183c..1cc6372 100644 --- a/modules/ROOT/pages/gds.adoc +++ b/modules/ROOT/pages/gds.adoc @@ -71,7 +71,7 @@ spark.read.format("org.neo4j.spark.DataSource") which will show a result like this: -```bash +``` +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+---------+-----------------+-------------+ |nodeProjection |relationshipProjection |graphName|nodeCount|relationshipCount|projectMillis| +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+---------+-----------------+-------------+ @@ -103,7 +103,7 @@ spark.read.format("org.neo4j.spark.DataSource") ---- ( spark.read.format("org.neo4j.spark.DataSource") - .option("gds", "gds.pageRank.stream") + .option("gds", "gds.pageRank.stream.estimate") .option("gds.graphName", "myGraph") .option("gds.configuration.concurrency", "2") .load() @@ -187,12 +187,11 @@ As you can see, we have now only the two columns `nodeId` and `score`, let's see # we'll assume that `spark` variable is already present # we create the `nodes_df` nodes_df = spark.read.format("org.neo4j.spark.DataSource") \ - .option("url", "neo4j://localhost:7687") \ .option("labels", "Page") \ .load() # we join `nodes_df` with `pr_df` created in the step before - new_df = nodes_df.join(pr_df, nodes_df.col("").equalTo(pr_df.col("nodeId"))) + new_df = nodes_df.join(pr_df, nodes_df[""] == pr_df["nodeId"]) new_df.show(truncate=False) ----