
Multiple Web Scrapers
Github
Languages & Tools: Java, Selenium
Ive created multiple webpage scrapers for different websites. Each scraper is uniquely customised for the formatting of the website. The data downloaded is then saved in an Excel Spreadsheet
Selenium automated browser is used.
List of amazon products with link, description, images and other information. The list is over 2 million columns and 14 rows long. - Scraped from https://www.amazon.com/








Ive also scraped youtube and instagram for videos, descriptions, hashtags and info.
Sample Code
void getPage() throws Exception{
int wordNo = 8013;
while(wordNo < readFile.textLine.length) { //PAGE
word = readFile.textLine[wordNo];
try {
driver.get("https://en.oxforddictionaries.com/definition/" + word);
Thread.sleep(delay);
getInfo(wordNo);
wordNo++;
}catch (Exception e){
System.out.println("Trying again");
}
}
}
void getInfo(int t){
try {
String definition = driver.findElement(By.xpath(".//*[@id='content']/div[1]/div[2]/div/div/div/div[1]/section[1]/ul/li[1]/div/p/span[2]")).getText().replace("n"," ").replace("""," ");
String example = getExample();
System.out.println((t+1) + "/" + readFile.textLine.length);
System.out.println("Word : " + word);
System.out.println("Definition : " + definition);
System.out.println("Example : " + example);
System.out.println();
createFile();
writer.append("n"" + word + "",");
writer.append(""" + definition + "",");
writer.append(""" + example + """);
saveFile();
} catch (Exception e) {
}
}
String getExample(){
String example = "How can I use " + word + ", in a sentence? Oh look I just used " + word + " in a sentence. Im amazing.";
for(int i = 0; i < 15; i++) {
try {
driver.findElement(By.xpath(".//*[@id='content']/div[1]/div[2]/div/div/div/div[1]/section[1]/ul/li[1]/div/div[" + i + "]/div[1]/button")).click();
example = driver.findElement(By.xpath(".//*[@id='content']/div[1]/div[2]/div/div/div/div[1]/section[1]/ul/li[1]/div/div[" + i + "]/div[2]/ul/li[1]/em")).getText().replace("n", " ").replace(""", " ").replace("‘","").replace("’","");
i = 15;
} catch (Exception e) {
}
}
return example;
}