neo4j · nvitucci · Oct 3, 2024 · Nov 27, 2024
diff --git a/modules/ROOT/pages/faq.adoc b/modules/ROOT/pages/faq.adoc
@@ -101,7 +101,7 @@ Refer to xref:overview.adoc#_spark_and_scala_compatibility[this page] to know wh
 This might happen when creating a new graph using the GDS library.
 The issue here is that the query is run the first time to extract the DataFrame schema and then is run again to get the data.
 
-To avoid this issue you can use the xref:quickstart.adoc#user-defined-schema[user defined schema] approach.
+To avoid this issue you can use the xref:read/define-schema.adoc#custom-schema[user defined schema] approach.
 
 == Databricks setup
 

diff --git a/modules/ROOT/pages/gds.adoc b/modules/ROOT/pages/gds.adoc
@@ -71,7 +71,7 @@ spark.read.format("org.neo4j.spark.DataSource")
 
 which will show a result like this:
 
-```bash
+```
 +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+---------+-----------------+-------------+
 |nodeProjection                            |relationshipProjection                                                                                                                                                                    |graphName|nodeCount|relationshipCount|projectMillis|
 +------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+---------+-----------------+-------------+
@@ -103,7 +103,7 @@ spark.read.format("org.neo4j.spark.DataSource")
 ----
 (
   spark.read.format("org.neo4j.spark.DataSource")
-  .option("gds", "gds.pageRank.stream")
+  .option("gds", "gds.pageRank.stream.estimate")
   .option("gds.graphName", "myGraph")
   .option("gds.configuration.concurrency", "2")
   .load()
@@ -187,12 +187,11 @@ As you can see, we have now only the two columns `nodeId` and `score`, let's see
     # we'll assume that `spark` variable is already present
     # we create the `nodes_df`
     nodes_df = spark.read.format("org.neo4j.spark.DataSource") \
-      .option("url", "neo4j://localhost:7687") \
       .option("labels", "Page") \
       .load()
 
     # we join `nodes_df` with `pr_df` created in the step before
-    new_df = nodes_df.join(pr_df, nodes_df.col("<id>").equalTo(pr_df.col("nodeId")))
+    new_df = nodes_df.join(pr_df, nodes_df["<id>"] == pr_df["nodeId"])
     new_df.show(truncate=False)
 ----