ID-list request & JSON parsing
This commit is contained in:
parent
a34f3d310f
commit
576e426b03
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
||||
/target
|
||||
testdata.json
|
||||
|
942
Cargo.lock
generated
942
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -8,3 +8,6 @@ edition = "2021"
|
||||
[dependencies]
|
||||
clap = "3.1.18"
|
||||
regex = "1.5.6"
|
||||
reqwest = { version = "0.11", features = ["json"] }
|
||||
tokio = { version = "1.18.2", features = ["full"] }
|
||||
serde_json = "1.0"
|
||||
|
87
src/main.rs
87
src/main.rs
@ -1,21 +1,25 @@
|
||||
use clap::{Arg, ArgGroup, Command};
|
||||
use regex::Regex;
|
||||
use serde_json::Value;
|
||||
use std::{path::PathBuf, process::exit};
|
||||
|
||||
// General error type to make error handling easier
|
||||
type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum Mode {
|
||||
Thread,
|
||||
Board,
|
||||
}
|
||||
|
||||
fn parse_cli_args() -> (PathBuf, String, Mode) {
|
||||
let matches = Command::new("My Super Program")
|
||||
.version("1.0")
|
||||
.author("Kevin K. <kbknapp@gmail.com>")
|
||||
.about("Does awesome things")
|
||||
fn parse_cli_args() -> Result<(PathBuf, String, Mode)> {
|
||||
let matches = Command::new("WGDL-imagescraper written in Rust")
|
||||
.version("0.1.0")
|
||||
.author("Arttu Einistö <einisto@proton.me>")
|
||||
.about("Scrapes images efficiently from 4chan.org")
|
||||
.arg(
|
||||
Arg::new("path")
|
||||
.short('p')
|
||||
Arg::new("output")
|
||||
.short('o')
|
||||
.long("path")
|
||||
.value_name("PATH")
|
||||
.help("Set an output directory")
|
||||
@ -46,11 +50,10 @@ fn parse_cli_args() -> (PathBuf, String, Mode) {
|
||||
.get_matches();
|
||||
|
||||
let re = Regex::new(
|
||||
r"(http|https)://boards.(4chan|4channel).org/[a-zA-Z]{1,4}/(catalog|thread/\d+)",
|
||||
)
|
||||
.unwrap();
|
||||
r"^((http|https)://)?boards.(4chan|4channel).org/[a-zA-Z]{1,4}/(catalog|thread/\d+)$",
|
||||
)?;
|
||||
|
||||
let path = PathBuf::from(matches.value_of("path").unwrap());
|
||||
let path = PathBuf::from(matches.value_of("output").unwrap());
|
||||
let target_match = matches.value_of("target").unwrap();
|
||||
let target = match re.is_match(target_match) {
|
||||
true => target_match,
|
||||
@ -64,15 +67,67 @@ fn parse_cli_args() -> (PathBuf, String, Mode) {
|
||||
false => Mode::Board,
|
||||
};
|
||||
|
||||
(path, String::from(target), mode)
|
||||
Ok((path, String::from(target), mode))
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// TODO: add var for default output path (similar to wgdl.py)
|
||||
let (path, target, mode) = parse_cli_args();
|
||||
fn create_thread_url(target: String) -> String {
|
||||
let url_vec: Vec<&str> = target.split("/").collect();
|
||||
let thread_id = url_vec.get(url_vec.len() - 1).unwrap();
|
||||
let board = url_vec.get(url_vec.len() - 3).unwrap();
|
||||
|
||||
format!("https://a.4cdn.org/{}/thread/{}.json", board, thread_id)
|
||||
}
|
||||
|
||||
fn create_board_url(target: String) -> String {
|
||||
let url_vec: Vec<&str> = target.split("/").collect();
|
||||
let board = url_vec.get(url_vec.len()).unwrap();
|
||||
|
||||
format!("https://a.4cdn.org/{}/catalog.json", board)
|
||||
}
|
||||
|
||||
async fn get_imagelist(json_url: &str) -> Result<Vec<(String, String)>> {
|
||||
let req_body = reqwest::get(json_url).await?.text().await?;
|
||||
let json_data: Value = serde_json::from_str(req_body.as_str())?;
|
||||
|
||||
let mut thread_img_data: Vec<(String, String)> = Vec::new();
|
||||
for post in json_data["posts"].as_array().unwrap() {
|
||||
if post["tim"].is_i64() {
|
||||
thread_img_data.push((
|
||||
post["tim"].to_string(),
|
||||
post["ext"].as_str().unwrap().to_string(),
|
||||
));
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(thread_img_data)
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
// TODO: add possible config-file for default output path (similar to wgdl.py)
|
||||
let (path, target, mode) = parse_cli_args()?;
|
||||
|
||||
println!(
|
||||
"CONFIG:\n\tPATH: {:?}\n\tTARGET: {}\n\tMODE: {:?}",
|
||||
"\nCONFIG:\n\tPATH: {:?}\n\tTARGET: {}\n\tMODE: {:?}\n",
|
||||
path, target, mode
|
||||
);
|
||||
|
||||
match mode {
|
||||
Mode::Thread => {
|
||||
let json_url = create_thread_url(target);
|
||||
let id_list = get_imagelist(&json_url.as_str()).await?;
|
||||
println!("{:#?}", id_list);
|
||||
// 3.) download img based on json
|
||||
}
|
||||
Mode::Board => {
|
||||
let json_url = create_board_url(target);
|
||||
let id_list = get_imagelist(&json_url.as_str()).await?;
|
||||
println!("{:#?}", id_list);
|
||||
// 3.) download img based on json
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user