#coding=utf-8
"""Modulo para hacer screen scraping de una búsqueda de segundamano.es
Author: Hermann Kaser,
URL: http://volt.io/segunda-mano/
Version: 0.1
"""

import re
import BeautifulSoup
import time
import locale
import urllib

# force spanish locale for dates
locale.setlocale(locale.LC_TIME, 'es_ES.UTF-8')

meses = ['ene', 'feb', 'mar', 'abr', 'may', 'jun', 'jul', 'ago', 'sep', 'oct', 'nov', 'dic']

def generateRSS(where, what):
    """generateRSS(where, what) -> {}

Función para hacer screen scraping de una búsqueda de segundamano.es
what: lo que buscar, parámetro 'q'
where: código de localidad interno de segundamano, parametro 'ca' 
    """
    url = 'http://www.segundamano.es/li?%s' % (urllib.urlencode({'ca':where, 'q':what}))

    html = urllib.urlopen(url).read()
    soup = BeautifulSoup.BeautifulSoup(html)

    table = soup.find(id='hl')
    trs = table.findAll(name='tr', recursive=False)
    
    feed = {
        'title': 'Feed de segunda mano buscando "%s"' % what,
        'link': url.replace('&', '&amp;'),
        'description': 'Feed de segunda mano buscando "%s"' % what,
        'items': []
    }

    for t in trs:
        # get date
        day_text = t.td.contents[0].strip()
        hour_text = t.td.contents[2].strip()
        
        # parse date into something useful
        (year, month, day, hour, minutes, seconds, x, y, z) = time.localtime()
        if day_text == 'Hoy':
            pass
        elif day_text == 'Ayer':
            day -= 1
        else:
            day = day_text[0:2].strip()
            curr_month = month
            month = meses.index(day_text[-3:]) + 1
            
            # if the month we're on is less than the month
            # of the item, it's probably from last year...
            if curr_month < month:
                year -= 1
            
        (hour, minute) = hour_text.split(':')
        
        # I hate date handling in Python
        # Why no just pass the tuple directly to strftime?
        # because mktime will correct set the last three parameters of the tuple
        # those are needed to echo out the name of the weekday
        time_tuple = time.localtime(time.mktime((year, int(month), int(day), int(hour), int(minute), 0, 0, 0, 1)))
        date = unicode(time.strftime('%A %d de %B, %Y', time_tuple), 'utf-8')
        
        # get link
        link = t.find(href=re.compile('/vi/[0-9]+'))['href']
        
        # get image
        image_tag = t.find(attrs={'class':'image'})
        if image_tag.contents[0].strip() == '&nbsp;':
            image = False
        else:
            image = image_tag.find(href=re.compile('/vi/[0-9]+')).img['src']
            
        # get description
        description_tag = image_tag.nextSibling.nextSibling
        
        description = description_tag.contents[1].contents[0].strip()
        price = description_tag.contents[4].strip()[:-6]
        
        description_simple = (description + ' - ' + price + ' euros')
        
        # get category
        category = description_tag.nextSibling.nextSibling.contents[0]
        
        feed['items'].append(
            {
                'title': description_simple,
                'link': 'http://www.segundamano.es%s' % link,
                'description': description,
                'price': price,
                'category': category,
                'image': image,
                'date': date
            }
        )
        
    return feed
