This program was intended to email me search results from craigslist. I wrote it before I realized you could get an rss feed of a search.

#!/usr/bin/env ruby
require 'mechanize'
require 'net/smtp'
require 'date'
 
# Sample configuration
city = 'atlanta'
terms = ['scsi', 'raid']
days_to_send = 3 # eg if run on the 15th return items posted on the 13th, 14th, and 15th
email_recipient = 'recipient@example.com'
email_html = true
email_server = 'localhost'
 
cutoff_date = Date.today - days_to_send
items = []
url = "http://#{city}.craigslist.org"
agent = WWW::Mechanize.new
 
terms.each do |term|
  page = agent.get(url)
  form = page.forms.action('/search/').first
  form.query = term
  page = agent.submit(form)
  # using CSS selectors
  page.search(:p) do |para|
    date_match = para.inner_text.match(/(\w{3})\s{1,2}(\d{1,2})/)
    month = Date::ABBR_MONTHNAMES.index(date_match[1])
    day = date_match[2].to_i
    item_date = Date.new(Date.today.year, month, day)
    if item_date > cutoff_date
      if email_html
        items << para.to_html
      else
        items << para.inner_text
      end
    end
  end
end
 
msg = <<END
From: Craig Scrape <donotreply@example.com>
To: Recipient <recipient@example.com>
Subject: Craigslist Ads
Date: #{Time.now.to_s}
Content-Type: text/html
#{items}
END
 
Net::SMTP.start(email_server, 25) do |smtp|
  smtp.send_message msg,
  'donotreply@example.com',
  email_recipient
end

This code is released under the GPL.

code/craigscrape.txt · Last modified: 2009/07/11 17:55 by brian
Recent changes · Show pagesource · Login