Commit 5881b692 by Sanjay Pant

Merge branch 'patch-1' into 'master'

Updating script to simplify the process of grabbing all relevant datetimes…

See merge request datasciencedojo/tutorials!1
parents a38cb6fa b537a440
......@@ -32,27 +32,20 @@ urls
# Grab all datetimes on the page
datetime <- marketwatch_bitcoin_articles %>%
html_nodes("div.deemphasized span.invisible") %>% #See HTML source code for data within this tag
html_nodes("div.deemphasized span") %>% #See HTML source code for data within this tag
html_text()
datetime
# Only first few datetimes are included in this div
# Grab datetimes for all URLs on the landing page
# and add these onto to the end of datetime vector
datetime2 <- marketwatch_bitcoin_articles %>%
html_nodes("div.deemphasized span") %>%
html_text()
datetime2
# Check the index where datetimes for other
# URLs start and loop through from that
# index to the end of all entries
for(i in datetime2[13:length(datetime2)]){
datetime <- c(datetime, i)
# Filter datetimes that do not follow a consistent format
datetime2 <- c()
for(i in datetime){
correct_datetime <- grep("Today", i, invert=T, value=T)
datetime2 <- append(datetime2, correct_datetime)
}
datetime <- datetime2
datetime
# Convert datetime text to a standard time format
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment