summaryrefslogtreecommitdiff
path: root/src/main.rs
blob: 50b5e0492a614f77c9716171318ce042d0904a74 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
use select::document::Document;
use select::predicate::{Name, Class, Predicate};
use select::node::{Node, Attrs};

use reqwest;

struct Post {
    id: i32,
    title: String,
    source: String,
    score: i32,
    author: String,
}

impl Post {
    pub fn to_string(&self) -> String {
        return [
            "Post {",
            format!("  id: {}", self.id).as_str(),
            format!("  title: {}", self.title).as_str(),
            format!("  source: {}", self.source).as_str(),
            format!("  score: {}", self.score).as_str(),
            format!("  author: {}", self.author).as_str(),
            "}"
        ].join("\n");
    }
}

fn get_attribute(attrs: Attrs, name: &str) -> Option<String> {
    let name = String::from(name);
    for (key, value) in attrs {
        if String::from(key) == name {
            return Some(String::from(value));
        }
    }
    return None;
}

fn get_page(url: &str) -> Option<String> {
    match reqwest::blocking::get(url) {
        Ok(res) => match res.text() {
            Ok(text) => return Some(text),
            Err(_) => return None
        },
        Err(_) => return None
    }
}

fn parse_hackernews(document: Document) -> Vec<Post> {
    let table = document.find(Class("itemlist").descendant(Name("tbody")))
        .next()
        .unwrap();

    let mut nodes: Vec<Node> = Vec::new();
    for node in table.children() {
        if node.is(Class("morespace")) { 
            break; 
        }
        else if !node.is(Class("spacer")) && node.is(Name("tr")) {
            nodes.push(node);
        }
    }

    println!("jopa1");
    let mut posts: Vec<Post> = Vec::new();
    for node in nodes.windows(2) {
        let link = node[0].find(Class("storylink")).next();
        let link = match link {
            Some(node) => node,
            None => continue
        };

        let id = get_attribute(node[0].attrs(), "id");
        let id = match id {
            Some(id) => {
                match id.parse::<i32>() {
                    Ok(num) => num,
                    Err(_) => continue
                }
            },
            None => continue
        };
        let title = link.text();
        let source = get_attribute(link.attrs(), "href");
        let source = match source {
            Some(href) => href,
            None => continue
        };


        let subscript = node[1].find(Class("subtext")).next();
        let subscript = match subscript {
            Some(node) => node,
            None => continue
        };

        let score = subscript.find(Class("score")).next();
        let score = match score {
            Some(node) => {
                let text = node.text();
                let n_text = text.split(' ').collect::<Vec<&str>>()[0];
                match n_text.parse::<i32>() {
                    Ok(num) => num,
                    Err(_) => continue
                }
            },
            None => continue
        };

        let author = subscript.find(Class("hnuser")).next();
        let author = match author {
            Some(user) => user.text(),
            None => continue
        };

        posts.push(Post {
            id,
            title,
            source,
            score,
            author
        });
    }


    return posts;
}

fn main() {
    let page = get_page("https://news.ycombinator.com/"); 
    let page = match page {
        Some(ref text) => text.as_str(),
        None => panic!("Returned page is empty)")
    };

    let doc = Document::from(page);
    let posts = parse_hackernews(doc);

    for post in posts {
        println!("{}", post.to_string());
    }
}