This is R Markdown Notebook is written by Alf Molinas for his ISP in April 2020 as part of the INSEAD MBA program.
library(rvest)
library(stringr)
library(httr)
#extract links from file
url <- read.csv(file.choose())
linklist <- url$Link
#create data frame for the extracted image urls
df <- data.frame(1:100)
for (item in linklist) {
if (item == ""){
return()
}
if (http_status(GET(item))$category == "Client error"){
imgsrc <- "NA"
}
else {
print(item)
#collect the src (image URLs) of all the images on the website
imgsrc <- read_html(item
) %>%
html_nodes(xpath = '//*/img') %>%
html_attr('src')
imgsrc
#save on to 1 column of a table where each URL is in a different row
column <- 1
add.col<-function(df, new.col) {n.row<-dim(df)[1]
length(new.col)<-n.row
cbind(df, new.col)
}
df<-add.col(df,imgsrc)
length(imgsrc)
}
}
write.csv(df, file = "pulledimages.csv")
Upload the pulled images from the csv file to an online table like Google spreadsheet or Airtable, and the links will convert to the images.
LS0tDQp0aXRsZTogIklTUCBESUdJTUlORCBQSUNUVVJFIFBVTEwiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpUaGlzIGlzIFIgTWFya2Rvd24gTm90ZWJvb2sgaXMgd3JpdHRlbiBieSBBbGYgTW9saW5hcyBmb3IgaGlzIElTUCBpbiBBcHJpbCAyMDIwIGFzIHBhcnQgb2YgdGhlIElOU0VBRCBNQkEgcHJvZ3JhbS4NCg0KDQoNCmBgYHtyfQ0KbGlicmFyeShydmVzdCkgIA0KbGlicmFyeShzdHJpbmdyKQ0KbGlicmFyeShodHRyKQ0KDQoNCg0KDQojZXh0cmFjdCBsaW5rcyBmcm9tIGZpbGUNCnVybCA8LSByZWFkLmNzdihmaWxlLmNob29zZSgpKQ0KbGlua2xpc3QgPC0gdXJsJExpbmsNCiNjcmVhdGUgZGF0YSBmcmFtZSBmb3IgdGhlIGV4dHJhY3RlZCBpbWFnZSB1cmxzDQpkZiA8LSBkYXRhLmZyYW1lKDE6MTAwKQ0KDQpmb3IgKGl0ZW0gaW4gbGlua2xpc3QpIHsNCiAgaWYgKGl0ZW0gPT0gIiIpew0KICAgIHJldHVybigpDQogIH0NCiAgaWYgKGh0dHBfc3RhdHVzKEdFVChpdGVtKSkkY2F0ZWdvcnkgPT0gIkNsaWVudCBlcnJvciIpew0KICAgIGltZ3NyYyA8LSAiTkEiDQogIH0NCiAgZWxzZSB7DQogIHByaW50KGl0ZW0pDQoNCiAgDQogIA0KI2NvbGxlY3QgdGhlIHNyYyAoaW1hZ2UgVVJMcykgb2YgYWxsIHRoZSBpbWFnZXMgb24gdGhlIHdlYnNpdGUNCmltZ3NyYyA8LSByZWFkX2h0bWwoaXRlbQ0KKSAlPiUNCiAgaHRtbF9ub2Rlcyh4cGF0aCA9ICcvLyovaW1nJykgJT4lDQogIGh0bWxfYXR0cignc3JjJykNCmltZ3NyYw0KDQoNCiNzYXZlIG9uIHRvIDEgY29sdW1uIG9mIGEgdGFibGUgd2hlcmUgZWFjaCBVUkwgaXMgaW4gYSBkaWZmZXJlbnQgcm93DQpjb2x1bW4gPC0gMQ0KYWRkLmNvbDwtZnVuY3Rpb24oZGYsIG5ldy5jb2wpIHtuLnJvdzwtZGltKGRmKVsxXQ0KICAgICAgICAgICBsZW5ndGgobmV3LmNvbCk8LW4ucm93DQogICAgICAgICAgIGNiaW5kKGRmLCBuZXcuY29sKQ0KfQ0KZGY8LWFkZC5jb2woZGYsaW1nc3JjKQ0KbGVuZ3RoKGltZ3NyYykNCiAgfQ0KfQ0KDQp3cml0ZS5jc3YoZGYsIGZpbGUgPSAicHVsbGVkaW1hZ2VzLmNzdiIpDQoNCmBgYA0KDQoNCg0KVXBsb2FkIHRoZSBwdWxsZWQgaW1hZ2VzIGZyb20gdGhlIGNzdiBmaWxlIHRvIGFuIG9ubGluZSB0YWJsZSBsaWtlIEdvb2dsZSBzcHJlYWRzaGVldCBvciBBaXJ0YWJsZSwgYW5kIHRoZSBsaW5rcyB3aWxsIGNvbnZlcnQgdG8gdGhlIGltYWdlcy4NCg==