33import org .apache .spark .api .java .JavaRDD ;
44import org .apache .spark .api .java .JavaSparkContext ;
55import org .apache .spark .sql .Dataset ;
6- import org .apache .spark .sql .Encoder ;
7- import org .apache .spark .sql .Encoders ;
86import org .apache .spark .sql .Row ;
97import org .apache .spark .sql .RowFactory ;
108import org .apache .spark .sql .SQLContext ;
1513import org .apache .spark .rdd .RDD ;
1614import org .graphframes .GraphFrame ;
1715import java .util .stream .IntStream ;
18- import java .io .IOException ;
19- import java .nio .file .Files ;
20- import java .nio .file .Path ;
21- import java .nio .file .Paths ;
2216import java .util .ArrayList ;
23- import java .util .Arrays ;
2417import java .util .List ;
2518import utils .Utils ;
2619
@@ -51,14 +44,7 @@ public static void wikipedia(JavaSparkContext ctx, SQLContext sqlCtx) {
5144
5245 Utils .line_separator ();
5346
54- // graphFrame.edges().show();
55- // graphFrame.vertices().show();
56-
57- // @todo: Benchmark with multiple dumping factor and numIterations
58- // --- Ideas ---
59- // 1. Define some dumping factors and numIterations and run & time pagerank algo. and select the best
6047 // For dumping factor
61- // List<String> time = new ArrayList<String>(Arrays.asList(new String[120]));
6248 List <Row > timeList = new ArrayList <Row >();
6349 StructType outputSchema = new StructType (new StructField [] {
6450 new StructField ("dumping_factor" , DataTypes .DoubleType , false , new MetadataBuilder ().build ()),
@@ -84,7 +70,6 @@ public static void wikipedia(JavaSparkContext ctx, SQLContext sqlCtx) {
8470 Utils .print (log );
8571 topVertices .show (10 );
8672 Utils .line_separator ();
87- // time.add(log);
8873 });
8974 });
9075
@@ -93,24 +78,5 @@ public static void wikipedia(JavaSparkContext ctx, SQLContext sqlCtx) {
9378 Long count = output .count ();
9479 output .show (count .intValue ());
9580 Utils .line_separator ();
96-
97- // try {
98- // Files.write(Paths.get("/root/SDM-Lab-2/src/main/java/exercise_4/output.txt"), time);
99- // } catch (IOException e) {
100- // Utils.print("Unable to save file" + e);
101- // }
102-
103- // GraphFrame gf = graphFrame.pageRank().tol(0.01).resetProbability(0.15).run();
104- // GraphFrame gf = graphFrame.pageRank().resetProbability(0.15).maxIter(10).run();
105-
106- // Utils.line_separator();
107-
108- // gf.edges().show();
109- // gf.vertices().show();
110-
111- // Utils.line_separator();
112- // Dataset<Row> topVertices = gf.vertices().sort(org.apache.spark.sql.functions.desc("pagerank"));
113- // topVertices.show(10);
114-
11581 }
11682}
0 commit comments