Commit b537a440 by Rebecca Merrett

Updating script to simplify the process of grabbing all relevant datetimes…

Updating script to simplify the process of grabbing all relevant datetimes tagged differently during certain times of the day
parent a38cb6fa
......@@ -32,27 +32,20 @@ urls
# Grab all datetimes on the page
datetime <- marketwatch_bitcoin_articles %>%
html_nodes("div.deemphasized span.invisible") %>% #See HTML source code for data within this tag
html_nodes("div.deemphasized span") %>% #See HTML source code for data within this tag
html_text()
datetime
# Only first few datetimes are included in this div
# Grab datetimes for all URLs on the landing page
# and add these onto to the end of datetime vector
datetime2 <- marketwatch_bitcoin_articles %>%
html_nodes("div.deemphasized span") %>%
html_text()
datetime2
# Check the index where datetimes for other
# URLs start and loop through from that
# index to the end of all entries
for(i in datetime2[13:length(datetime2)]){
datetime <- c(datetime, i)
# Filter datetimes that do not follow a consistent format
datetime2 <- c()
for(i in datetime){
correct_datetime <- grep("Today", i, invert=T, value=T)
datetime2 <- append(datetime2, correct_datetime)
}
datetime <- datetime2
datetime
# Convert datetime text to a standard time format
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment