Idris Kadri
Front & Back End Developer & Animator

Amazon Scraper

Github

Languages & Tools: Java, Jsoup

Two programs: Using an amazon category or search URL, the first program downloads each amazon product link to a text file. The second program parses all the information from each product link, and downloads them to a CSV Spreadsheet file.


Sample Code



	

public void getPrice(){
try{
Element prices = document.getElementById("priceblock_ourprice");
price = prices.text().replace("$ ", "").replace(" ", ".").replace("$", "");

}catch(Exception e){
}
}

public void getReviews(){
try{
Element custReviews = document.getElementById("acrCustomerReviewLink");

String customer = custReviews.text();

customerReviews = customer;


}catch(Exception e){
}

try{
Elements reviews = document.getElementsByClass("a-row review-data");

String words5 = reviews.first().html();
String temp5 = words5.replace("
", "$$$");
Document doc5 = Jsoup.parse(temp5);
String desc5 = doc5.body().text().replace("$$$", "n").replace("Read more", "").toString();

review = desc5;

}catch(Exception e){

}

}

public void getCategories(){
try{ Element navigation = document.getElementById("wayfinding-breadcrumbs_container");
String navEdit = navigation.text().replaceAll(" › ", "|");

postCategory = navEdit + "|" + getBy;
}
catch(Exception e){
postCategory = getBy;
}
}

public void getImageString(){
try {
Elements test = document.select("div#leftCol img[src]");
imageString = "div#leftCol img[src]";
}catch(Exception e){
imageString = "div#altImages img[src]";
}
}

public void getProductImages(){

try{
Elements imageE = document.select(imageString);
String image = imageE.attr("src") ;


featuredImage = image;

if (image.contains("G/01/")||!image.startsWith("http")){
System.out.println("There is no featured image... Skipped....");
skip = true;
noImg = true;

}
}catch(Exception e){
System.out.println("getProduct Images failed");
}
}

Back