some small changes to the evaluation scripts

cleaned up the code
added benchmark results to README
2022-09-16 17:14:45 +02:00 · 2022-09-16 17:14:28 +02:00 · 2022-09-16 17:14:03 +02:00 · 2022-09-16 09:26:17 +02:00
6 changed files with 123 additions and 43 deletions
--- a/README.md
+++ b/README.md
@ -117,6 +117,55 @@ big set of parameters.

 `utils/plot_results.py` generates several plots of the results.

+# Results
+
+These are some quick tests, further results will be presented later.
+Everything was run on a Thinkpad X260 laptop with an Intel i7-6600U CPU @ 2.60GHz
+processor.
+Each test used the same 1000 queries.
+Rust v1.57.0 was used for all tests.
+
+The ALT variants were used with the 4 best landmarks.
+Further tests on the performance of more landmarks will be presented laster.
+The set of 44 handpicked landmarks were spread around the extremeties of the
+continents and into "dead ends" like the Mediteranean and the Gulf of Mexico
+with the goal to provide landmarks that are "behind" the source or target
+node.
+
+All benchmarks were run on the provided benchmark graph.
+
+## raw data:
+```
+# name, (avg. heap pops per query, avg. time)
+{'astar': (155019.451, 0.044386497025),
+ 'dijkstra': (423046.796, 0.058129875474999995),
+ 'greedy_32': (42514.751, 0.013299024275000002),
+ 'greedy_64': (35820.461, 0.011887869759),
+ 'handpicked_44': (70868.721, 0.01821366828),
+ 'random_32': (58830.082, 0.016845884717),
+ 'random_64': (51952.261, 0.015234422699)}
+```
+
+## Interpretation
+
+Dijkstra needs ~58ms per route, while the best version is greedy\_64 (that is
+with 64 landmarks) needs only 12 seconds, which is ~5 times faster.
+We also see, that the greedy versions perform slightly better than their
+random counterparts with the same amount of nodes.
+While the 44 handpicked landmarks outperformed A\* and Dijkstra, they are beaten
+by both the random and greedy landmark selections which had fewer nodes.
+
+## Memory Consumption
+
+The landmarks are basically arrays of the cost to each node.
+Since the distances are currently calculates with 64 bit integers
+each landmark needs 8 byte per node in the graph.
+With a graph that has about 700k nodes this leads to ~5.5MB of memory per
+landmark.
+So 64 landmarks need ~350MB of memory.
+
+One could also use 32 bit integers which would half the memory requirements.
+
 # References

 [1](Computing the Shortest Path: A\* meets Graph Theory, A. Goldberg and C. Harrelson, Microsoft Research, Technical Report MSR-TR-2004-24, 2004)
--- a/src/alt.rs
+++ b/src/alt.rs
@ -1,19 +1,25 @@
 use crate::gridgraph::{EdgeCost, GraphNode, GridGraph, NodeId};
 use serde::{Deserialize, Serialize};
-use std::cmp::Ordering;
 use std::collections::BinaryHeap;
+use crate::utils::DijkstraElement;

+/// a single Landmark
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Landmark {
    pub node: GraphNode,
    pub distances: Vec<EdgeCost>,
 }

+
+/// A set of Landmarks
 #[derive(Debug, Clone, Default, Serialize, Deserialize)]
 pub struct LandmarkSet {
    pub landmarks: Vec<Landmark>,
 }

+
+/// The LandmarkBestSet is the datastructure in which the indices of the
+/// best landmarks for a certain query are stored.
 #[derive(Debug, Clone)]
 pub struct LandmarkBestSet<'a> {
    pub landmark_set: &'a LandmarkSet,
@ -22,33 +28,20 @@ pub struct LandmarkBestSet<'a> {
 }

 impl Landmark {
+
+    /// generates a landmark (calculates all distances) for a given node.
    pub fn generate(node: GraphNode, graph: &GridGraph) -> Landmark {
+
+        // This is running a simplified version of dijkstra.
+        // It also does not track the ancestors of a node, because it is
+        // not needed for generating hte landmarks.
+        
        let mut landmark = Landmark {
            node,
            distances: vec![EdgeCost::MAX; graph.nodes.len()],
        };
        landmark.node = node;

-        #[derive(Eq, PartialEq)]
-        struct DijkstraElement {
-            index: u32,
-            cost: EdgeCost,
-        }
-
-        impl Ord for DijkstraElement {
-            // inverted cmp function, such that the Max-Heap provided by Rust
-            // can be used as a Min-Heap
-            fn cmp(&self, other: &Self) -> Ordering {
-                other.cost.cmp(&self.cost)
-            }
-        }
-
-        impl PartialOrd for DijkstraElement {
-            fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-                Some(self.cmp(other))
-            }
-        }
-
        let mut heap = BinaryHeap::new();
        heap.push(DijkstraElement {
            cost: 0,
@ -86,7 +79,7 @@ impl Landmark {
    /// calculates the lower-bounding distance estimate between the 2 nodes
    /// via the landmark.
    /// If one or more of the nodes are not reachable from the landmark
-    /// an estimate of `0` is returned.
+    /// an estimate of `EdgeCost::MAX` is returned.
    pub fn estimate(&self, from: NodeId, to: NodeId) -> EdgeCost {
        let l_to = self.distances[to];
        let l_from = self.distances[from];
@ -100,12 +93,7 @@ impl Landmark {
            // which except for the sign are the same value.
            // We can simply take the bigger one, which is handled
            // nicely the abs() function
-            let distance = (l_to as i64 - l_from as i64).abs() as EdgeCost;
-            //println!(
-            //    "distance from {} to {} via landmark {} is at least {}",
-            //    from, to, self.node.index, distance
-            //);
-            distance
+            (l_to as i64 - l_from as i64).abs() as EdgeCost
        }
    }
 }
--- a/src/astar.rs
+++ b/src/astar.rs
@ -4,6 +4,8 @@ use crate::utils::EARTH_RADIUS;
 use std::cmp::Ordering;
 use std::collections::BinaryHeap;

+
+/// datastructure to hold data required by the A* algorithm
 pub struct AStar<'a> {
    pub graph: &'a GridGraph,
 }
@ -23,8 +25,6 @@ struct HeapElement {
 }

 impl Ord for HeapElement {
-    // inverted cmp function, such that the Max-Heap provided by Rust
-    // can be used as a Min-Heap
    fn cmp(&self, other: &Self) -> Ordering {
        other.cost.cmp(&self.cost)
    }
@ -36,21 +36,26 @@ impl PartialOrd for HeapElement {
    }
 }

+/// A simple haversine distance heuristic.
 pub fn estimate_haversine(node: &GraphNode, destination: &GraphNode) -> EdgeCost {
    // simple haversine distance
    (node.position.distance_to(&destination.position) * EARTH_RADIUS) as EdgeCost
-    // let lat_dist_a = (node.position.lat - destination.position.lat).abs();
-    // let lat_dist_b = (destination.position.lat - node.position.lat).abs();
-
-    // (lat_dist_a.min(lat_dist_b) * EARTH_RADIUS) as EdgeCost
 }

+/// a simple heuristic based on the difference in lattitude between two points
+/// The idea is that it is cheaper to calculate than the haversine distance.
 pub fn estimate_latitude(node: &GraphNode, destination: &GraphNode) -> EdgeCost {
    let lat_dist = (node.position.lat - destination.position.lat).abs();

    (lat_dist * EARTH_RADIUS) as EdgeCost
 }
+
 impl AStar<'_> {
+
+    /// calculates the shortest path from start to end given the `estimate`
+    /// heuristic function.
+    ///
+    /// Returns `None` if no path exists.
    pub fn shortest_path<F>(&self, start: &GraphNode, end: &GraphNode, estimate: F) -> Option<Route>
    where
        F: Fn(&GraphNode, &GraphNode) -> EdgeCost,
--- a/src/bin/task6-rocket.rs
+++ b/src/bin/task6-rocket.rs
@ -143,6 +143,8 @@ fn rocket() -> _ {

    let landmarks = load_landmarks(&args.landmarks);

+    println!("Listening on http://localhost:8000");
+
    rocket::build()
        .manage(GraphWrapper {
            graph: *graph,
--- a/src/utils.rs
+++ b/src/utils.rs
@ -1,18 +1,23 @@
 use crate::alt::LandmarkSet;
-use crate::gridgraph::GridGraph;
+use crate::gridgraph::{EdgeCost, GridGraph};
 use serde::{Deserialize, Serialize};
+use std::cmp::Ordering;
 use std::fs::File;
 use std::io::BufReader;
 use std::process::exit;

+/// an approximation of the earths radius.
 pub const EARTH_RADIUS: f64 = 6_371_000.0; // meters

+/// serialization format for routing queries.
 #[derive(Serialize, Deserialize, Debug, Copy, Clone)]
 pub struct RoutingQuery {
    pub source: usize,
    pub destination: usize,
 }

+/// loads the graph from the given path.
+/// exits if an error occurs during loading.
 pub fn load_graph(path: &str) -> Box<GridGraph> {
    println!("Loading file from {}", path);
    let file = match File::open(path) {
@ -36,6 +41,8 @@ pub fn load_graph(path: &str) -> Box<GridGraph> {
    graph
 }

+/// loads a set of landmarks from the given path.
+/// exits if an error occurs during loading.
 pub fn load_landmarks(path: &str) -> LandmarkSet {
    let landmarks = match File::open(path) {
        Ok(f) => f,
@ -47,3 +54,25 @@ pub fn load_landmarks(path: &str) -> LandmarkSet {

    bincode::deserialize_from(BufReader::new(landmarks)).unwrap()
 }
+
+/// A heap element for Dijkstra's algorithm.
+///
+/// The comparison functions are inverted, so that Rusts MaxHeap works as a
+/// MinHeap.
+#[derive(Eq, PartialEq)]
+pub struct DijkstraElement {
+    pub index: u32,
+    pub cost: EdgeCost,
+}
+
+impl Ord for DijkstraElement {
+    fn cmp(&self, other: &Self) -> Ordering {
+        other.cost.cmp(&self.cost)
+    }
+}
+
+impl PartialOrd for DijkstraElement {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
--- a/utils/plot_results.py
+++ b/utils/plot_results.py
@ -6,6 +6,7 @@ from csv import writer
 from typing import Tuple, List
 import re
 import numpy as np
+from pprint import pprint

 import matplotlib.pyplot as plt

@ -17,7 +18,7 @@ path = argv[1]

 files = [f for f in os.listdir(path) if os.path.isfile(f"{ path }/{f}")]

-files = [f for f in files if re.match(r"greedy_64_.+", f) is not None ]
+# files = [f for f in files if re.match(r"greedy_64_.+", f) is not None ]


 def parse_file(file: str) -> Tuple[float, List[int]]:
@ -53,7 +54,7 @@ with open("times.csv", "w+") as times_file:
            full_path = f"{ path }/{ file }"
            time, pop = parse_file(full_path)

-            total_pops = sum(pop)
+            total_pops = sum(pop)/len(pop)

            results[name] = (total_pops, time)

@ -61,19 +62,25 @@ with open("times.csv", "w+") as times_file:

            pops.writerow([name, *pop])
 rel_pops = list()
+abs_pops = list()
 rel_time = list()
+abs_time = list()
 labels = list()

-# base_pops = results["dijkstra"][0]
-# base_time = results["dijkstra"][1]
-base_pops = results["greedy_64_1"][0]
-base_time = results["greedy_64_1"][1]
+
+
+pprint(results)
+baseline = "dijkstra"
+base_pops = results[baseline][0]
+base_time = results[baseline][1]

 for name, values in results.items():
    pops, time = values
    labels.append(name)
    rel_pops.append(pops/base_pops)
    rel_time.append(time/base_time)
+    abs_pops.append(pops)
+    abs_time.append(time)



@ -81,8 +88,8 @@ x = np.arange(len(labels))  # the label locations
 width = 0.35  # the width of the bars

 fig, ax = plt.subplots()
-rects1 = ax.bar(x - width/2, rel_time , width, label='time')
-rects2 = ax.bar(x + width/2, rel_pops, width, label='pops')
+rects1 = ax.bar(x - width/2, abs_time , width, label='time')
+# rects2 = ax.bar(x + width/2, abs_pops, width, label='pops')

 ax.legend()
 ax.set_xticks(x, labels)
Author	SHA1	Message	Date
Johannes Erwerle	5b93233915	some small changes to the evaluation scripts	2022-09-16 17:14:45 +02:00
Johannes Erwerle	275c2d933e	cleaned up the code	2022-09-16 17:14:28 +02:00
Johannes Erwerle	ea0e22506d	added benchmark results to README	2022-09-16 17:14:03 +02:00
Johannes Erwerle	be8effeb18	webserver now prints the UI its running on	2022-09-16 09:26:17 +02:00