Commit b537a440 by Rebecca Merrett

Updating script to simplify the process of grabbing all relevant datetimes…

Updating script to simplify the process of grabbing all relevant datetimes tagged differently during certain times of the day
parent a38cb6fa
...@@ -32,27 +32,20 @@ urls ...@@ -32,27 +32,20 @@ urls
# Grab all datetimes on the page # Grab all datetimes on the page
datetime <- marketwatch_bitcoin_articles %>% datetime <- marketwatch_bitcoin_articles %>%
html_nodes("div.deemphasized span.invisible") %>% #See HTML source code for data within this tag html_nodes("div.deemphasized span") %>% #See HTML source code for data within this tag
html_text() html_text()
datetime datetime
# Only first few datetimes are included in this div # Filter datetimes that do not follow a consistent format
# Grab datetimes for all URLs on the landing page datetime2 <- c()
# and add these onto to the end of datetime vector for(i in datetime){
datetime2 <- marketwatch_bitcoin_articles %>% correct_datetime <- grep("Today", i, invert=T, value=T)
html_nodes("div.deemphasized span") %>% datetime2 <- append(datetime2, correct_datetime)
html_text()
datetime2
# Check the index where datetimes for other
# URLs start and loop through from that
# index to the end of all entries
for(i in datetime2[13:length(datetime2)]){
datetime <- c(datetime, i)
} }
datetime <- datetime2
datetime datetime
# Convert datetime text to a standard time format # Convert datetime text to a standard time format
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment