@@ -15,14 +15,14 @@ func getEnv(key, fallback string) string {
1515 if value , exists := os .LookupEnv (key ); exists {
1616 return value
1717 }
18-
1918 return fallback
2019}
2120
2221func main () {
2322 mongoHost := getEnv ("MONGO_HOST" , "localhost" )
2423 mongoPassword := getEnv ("MONGO_PASSWORD" , "" )
2524 mongoUsername := getEnv ("MONGO_USERNAME" , "" )
25+ mongoDatabase := getEnv ("MONGO_DB" , "test" )
2626
2727 fmt .Println ("Page Rank Service!" )
2828
@@ -47,169 +47,125 @@ func main() {
4747 fmt .Println ("Successfully connected to MongoDB!" )
4848
4949 // Access the test database
50- db := client .Database ("test" )
50+ db := client .Database (mongoDatabase )
5151
52- // Access the outlinks collection
53- coll := db .Collection ("outlinks" )
52+ // Access the outlinks and backlinks collections
53+ outlinksColl := db .Collection ("outlinks" )
54+ backlinksColl := db .Collection ("backlinks" )
5455
55- // Get the count of documents in the collection
56- count , err := coll .CountDocuments (context .TODO (), bson.D {})
56+ // Get the count of documents in the outlinks collection
57+ count , err := outlinksColl .CountDocuments (context .TODO (), bson.D {})
5758 if err != nil {
58- panic (fmt .Sprintf ("Could not count documents: %v" , err ))
59+ panic (fmt .Sprintf ("Could not count documents in outlinks : %v" , err ))
5960 }
6061
61- fmt .Printf ("Number of documents in the collection: %d\n " , count )
62-
63- // Iterate over the documents in the collection
64- cursor , err := coll .Find (context .TODO (), bson.D {})
62+ backlinks := make (map [string ][]string )
63+ cursorBacklinks , err := backlinksColl .Find (context .TODO (), bson.D {})
6564 if err != nil {
66- panic (fmt .Sprintf ("Could not find documents : %v" , err ))
65+ panic (fmt .Sprintf ("Could not fetch backlinks : %v" , err ))
6766 }
68-
69- defer cursor .Close (context .TODO ())
70-
71- // Page rank setup
72- // Create a hash map to hold the page_url and its corresponding page rank
73- pageRank := make (map [string ]float64 )
74-
75- for cursor .Next (context .TODO ()) {
76- var result bson.M
77- if err := cursor .Decode (& result ); err != nil {
78- panic (fmt .Sprintf ("Could not decode document: %v" , err ))
67+ defer cursorBacklinks .Close (context .TODO ())
68+ for cursorBacklinks .Next (context .TODO ()) {
69+ var doc struct {
70+ ID string `bson:"_id"`
71+ Links []string `bson:"links"`
7972 }
80-
81- // Get the _id field, this is the page_url
82- url , ok := result ["_id" ].(string )
83- if ! ok {
84- panic ("Could not convert _id to string" )
73+ if err := cursorBacklinks .Decode (& doc ); err != nil {
74+ panic (fmt .Sprintf ("Could not decode backlink document: %v" , err ))
8575 }
86-
87- // Assign a starting page rank value
88- pageRank [url ] = 1.0 / float64 (count )
76+ backlinks [doc .ID ] = doc .Links
8977 }
9078
91- if err := cursor .Err (); err != nil {
92- panic (fmt .Sprintf ("Cursor error: %v" , err ))
79+ outlinksCount := make (map [string ]int )
80+ cursorOutlinks , err := outlinksColl .Find (context .TODO (), bson.D {})
81+ if err != nil {
82+ panic (fmt .Sprintf ("Could not fetch outlinks: %v" , err ))
83+ }
84+ defer cursorOutlinks .Close (context .TODO ())
85+ for cursorOutlinks .Next (context .TODO ()) {
86+ var doc struct {
87+ ID string `bson:"_id"`
88+ Links []string `bson:"links"`
89+ }
90+ if err := cursorOutlinks .Decode (& doc ); err != nil {
91+ panic (fmt .Sprintf ("Could not decode outlink document: %v" , err ))
92+ }
93+ outlinksCount [doc .ID ] = len (doc .Links )
9394 }
9495
95- // Print the initial page rank values
96- fmt .Println ("Initial Page Rank values:" )
97- for url , rank := range pageRank {
98- fmt .Printf ("Page URL: %s, Page Rank: %f\n " , url , rank )
96+ pageRank := make (map [string ]float64 )
97+ for url := range outlinksCount {
98+ pageRank [url ] = 1.0 / float64 (count )
9999 }
100100
101- // Page rank algorithm
102- // Set the number of iterations
101+ fmt . Printf ( "Total number of URLs: %d \n " , count )
102+
103103 iterations := 10
104- for range iterations {
105- // Create a temporary hash map to hold the new page rank values
104+ damping := 0.85
105+ for i := 0 ; i < iterations ; i ++ {
106106 newPageRank := make (map [string ]float64 )
107107
108- // Calculate the new page rank values
109- for url , rank := range pageRank {
110- fmt .Printf ("Calculating new page rank for URL: %s | Previous Rank: %v\n " , url , rank )
111-
112- // Get the backlinks for the current URL
113- var backlinksDoc struct {
114- Links []string `bson:"links"`
115- }
116-
117- // Get the backlinks for the current URL
118- err := db .Collection ("backlinks" ).FindOne (context .TODO (), bson.D {{Key : "_id" , Value : url }}).Decode (& backlinksDoc )
119- if err != nil {
120- if err == mongo .ErrNoDocuments {
121- // No backlinks found for this URL
122- fmt .Printf ("No backlinks found for URL %s\n " , url )
123- } else {
124- panic (fmt .Sprintf ("Could not find backlinks for URL %s: %v" , url , err ))
125- }
126- continue
127- }
128-
129- // Get the count of backlinks
130- backlinksCount := len (backlinksDoc .Links )
131- fmt .Printf ("\t Found %d backlinks for URL: %s\n " , backlinksCount , url )
132-
133- newCumulativeRank := 0.0
134-
135- // Iterate over the backlinks and calculate the new page rank
136- for _ , backlink := range backlinksDoc .Links {
137- // Get the outlink document for the specified backlink
138- var outlinkDoc struct {
139- Links []string `bson:"links"`
140- }
141-
142- // Get the count of outlinks
143- err := db .Collection ("outlinks" ).FindOne (context .TODO (), bson.D {{Key : "_id" , Value : backlink }}).Decode (& outlinkDoc )
144- if err != nil {
145- if err == mongo .ErrNoDocuments {
146- // No outlinks found for this URL
147- fmt .Printf ("No outlinks found for URL %s\n " , backlink )
148- } else {
149- panic (fmt .Sprintf ("Could not find outlinks for URL %s: %v" , backlink , err ))
108+ for url , _ := range pageRank {
109+ var newCumulativeRank float64
110+
111+ backlinksForUrl , exists := backlinks [url ]
112+ if exists {
113+ for _ , backlink := range backlinksForUrl {
114+ outlinkCount , ok := outlinksCount [backlink ]
115+ if ok {
116+ backlinkRank , ok := pageRank [backlink ]
117+ if ok {
118+ newCumulativeRank += backlinkRank / float64 (outlinkCount )
119+ }
150120 }
151- continue
152- }
153- outlinksCount := len (outlinkDoc .Links )
154- // fmt.Printf("\t\tFound %d outlinks for URL: %s\n", outlinksCount, backlink)
155-
156- // Get the previous page rank value for the backlink
157- backlinkRank , ok := pageRank [backlink ]
158- if ! ok {
159- // fmt.Printf("No page rank found for backlink %s\n", backlink)
160- continue
161121 }
162- // fmt.Printf("\t\t\tBacklink Page Rank: %f\n", backlinkRank)
163-
164- newCumulativeRank += backlinkRank / float64 (outlinksCount )
165122 }
166123
167- damping := 0.85
168124 newPageRank [url ] = (1 - damping )/ float64 (count ) + damping * newCumulativeRank
169- fmt .Println ()
170125 }
171126
172- // Update the page rank values
173127 pageRank = newPageRank
174-
175- // Print the new page rank values
176- fmt .Println ("New Page Rank values:" )
177- for url , rank := range pageRank {
178- fmt .Printf ("Page URL: %s, Page Rank: %f\n " , url , rank )
179- }
180- fmt .Println ("--------------------------------------------------" )
181128 }
182129
183- // Sort the page rank values by rank
184- // Create a slice to hold the page rank values
185- type PageRank struct {
130+ var sortedPageRanks []struct {
186131 URL string
187132 Rank float64
188133 }
189- var pageRanks []PageRank
190134 for url , rank := range pageRank {
191- pageRanks = append (pageRanks , PageRank {URL : url , Rank : rank })
135+ sortedPageRanks = append (sortedPageRanks , struct {
136+ URL string
137+ Rank float64
138+ }{url , rank })
192139 }
193- // Sort the page ranks by rank
194- sort .Slice (pageRanks , func (i , j int ) bool {
195- return pageRanks [i ].Rank > pageRanks [j ].Rank
140+ sort .Slice (sortedPageRanks , func (i , j int ) bool {
141+ return sortedPageRanks [i ].Rank > sortedPageRanks [j ].Rank
196142 })
197143
198- // Print the sorted page rank values
144+ // Print sorted page ranks
199145 fmt .Println ("Sorted Page Rank values:" )
200- for _ , pageRank := range pageRanks {
146+ for _ , pageRank := range sortedPageRanks {
201147 fmt .Printf ("Page URL: %s, Page Rank: %f\n " , pageRank .URL , pageRank .Rank )
202148 }
203149
204- // Save the page rank values to the database
205- for _ , pageRank := range pageRanks {
206- _ , err := db .Collection ("pagerank" ).InsertOne (context .TODO (), bson.D {
207- {Key : "_id" , Value : pageRank .URL },
208- {Key : "rank" , Value : pageRank .Rank },
209- })
150+ var bulkOps []mongo.WriteModel
151+ for _ , pageRank := range sortedPageRanks {
152+ bulkOps = append (bulkOps , mongo .NewUpdateOneModel ().
153+ SetFilter (bson.D {{Key : "_id" , Value : pageRank .URL }}).
154+ SetUpdate (bson.D {
155+ {Key : "$set" , Value : bson.D {
156+ {Key : "rank" , Value : pageRank .Rank },
157+ }},
158+ }).
159+ SetUpsert (true ))
160+ }
161+
162+ // Execute the batch insert
163+ if len (bulkOps ) > 0 {
164+ _ , err := db .Collection ("pagerank" ).BulkWrite (context .TODO (), bulkOps )
210165 if err != nil {
211- panic (fmt .Sprintf ("Could not insert page rank value : %v" , err ))
166+ panic (fmt .Sprintf ("Could not batch insert page rank values : %v" , err ))
212167 }
213168 }
169+
214170 fmt .Println ("Page rank values saved to the database!" )
215171}
0 commit comments