Commit 5881b692 by Sanjay Pant

Merge branch 'patch-1' into 'master'

Updating script to simplify the process of grabbing all relevant datetimes…

See merge request !1
parents a38cb6fa b537a440
...@@ -32,27 +32,20 @@ urls ...@@ -32,27 +32,20 @@ urls
# Grab all datetimes on the page # Grab all datetimes on the page
datetime <- marketwatch_bitcoin_articles %>% datetime <- marketwatch_bitcoin_articles %>%
html_nodes("div.deemphasized span.invisible") %>% #See HTML source code for data within this tag html_nodes("div.deemphasized span") %>% #See HTML source code for data within this tag
html_text() html_text()
datetime datetime
# Only first few datetimes are included in this div # Filter datetimes that do not follow a consistent format
# Grab datetimes for all URLs on the landing page datetime2 <- c()
# and add these onto to the end of datetime vector for(i in datetime){
datetime2 <- marketwatch_bitcoin_articles %>% correct_datetime <- grep("Today", i, invert=T, value=T)
html_nodes("div.deemphasized span") %>% datetime2 <- append(datetime2, correct_datetime)
html_text()
datetime2
# Check the index where datetimes for other
# URLs start and loop through from that
# index to the end of all entries
for(i in datetime2[13:length(datetime2)]){
datetime <- c(datetime, i)
} }
datetime <- datetime2
datetime datetime
# Convert datetime text to a standard time format # Convert datetime text to a standard time format
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment