# -*- coding: utf-8 -*-

"""
This ugly duckling was created by Beardswitcher 20160702 during Gubbdata2016
Free to use and modify however you like. AS LONG AS YOU APPEND TO THE CHANGELOG AND BUMP UP THE VERSION !

Please note:
Some parts are ugly but it works. 

Changelog:
Version Date     Name            Comment
0.1     20160701 BeardSwitcher   First working version, tested on LUBUNTU 12.N
0.2     20160702 BeardSwitcher   Code cleanup
0.3     20160702 BeardSwitcher   Released
0.4     20160702 BeardSwitcher   translateHtml() added
0.5     20160702 BeardSwitcher   Released

Known bugs:
latest_releases() is not as beautiful as it can be. but it works
depending on your printer (?) might you get garbled chars printed for the chars translated by translateHTML() function

"""
SCRIPTVERSION = '0.5'


import subprocess
import os
from time import sleep
from datetime import datetime
import sys

SCRIPTNAME = 'CSDB.DK news stream'
SCRIPTTEXT = 'Collects data from CSDB.DK, prints new data when it happens. '
SCRIPTCREATEDBY = 'Created by Beardswitcher 20160702 during Gubbdata 2016 www.gubbdata.se. Free to use, share and modify, have fun.'
SCRIPTWORKSON = 'Tested OK on: Lubuntu 12.N with star LC-10 matrix printer'

HELPTEXT = '(sudo) python csdbnews.py ARGUMENT\nARGUMENT = FILEDUMP -> csdb.dk will be dumped to file \nARGUMENT = filename -> Read the file instead of reading csdb.dk (good for debugging)'
PRINTEREXCEPTION = 'Exception when sending text to printer'
EXCEPTION = 'Exception occured'
SUBPROCESSEXCEPTION = 'Exception occured when running subprocess'

# ISO 8859-1 Character Entities
DICTIONARY = {
'À' : "&Agrave;", 'Á' : "&Aacute;", 'Â' : "&Acirc;", 'Ã' : "&Atilde;", 'Ä' : "&Auml;", 'Å' : "&Aring;",
'Æ' : "&AElig;", 'Ç' : "&Ccedil;",
'È' : "&Egrave;", 'É' : "&Eacute;", 'Ê' : "&Ecirc;", 'Ë' : "&Euml;",
'Ì' : "&Igrave;", 'Í' : "&Iacute;", 'Î' : "&Icirc;", 'Ï' : "&Iuml;",
'Ð' : "&ETH;", 'Ñ' : "&Ntilde;",
'Ò' : "&Ograve;", 'Ó' : "&Oacute;", 'Ô' : "&Ocirc;", 'Õ' : "&Otilde;", 'Ö' : "&Ouml;", 'Ø' : "&Oslash;",
'Ù' : "&Ugrave;", 'Ú' : "&Uacute;", 'Û' : "&Ucirc;", 'Ü' : "&Uuml;",
'Ý' : "&Yacute;",
'Þ' : "&THORN;", 'ß' : "&szlig;",
'à' : "&agrave;", 'á' : "&aacute;", 'â' : "&acirc;", 'ã' : "&atilde;", 'ä' : "&auml;", 'å' : "&aring;",
'æ' : "&aelig;", 'ç' : "&ccedil;",
'è' : "&egrave;", 'é' : "&eacute;", 'ê' : "&ecirc;", 'ë' : "&euml;",
'ì' : "&igrave;", 'í' : "&iacute;", 'î' : "&icirc;", 'ï' : "&iuml;",
'ð' : "&eth;", 'ñ' : "&ntilde;",
'ò' : "&ograve;", 'ó' : "&oacute;", 'ô' : "&ocirc;", 'õ' : "&otilde;", 'ö' : "&ouml;", 'ø' : "&oslash;",
'ù' : "&ugrave;", 'ú' : "&uacute;", 'û' : "&ucirc;", 'ü' : "&uuml;",
'ý' : "&yacute;", 'þ' : "&thorn;", 'ÿ' : "&yuml;", '': "&quot;", '': "&amp;"
}

def translateHTML(text):
    """
    translate HTML special char codes to 'proper' chars.
    also cleans out some other unwanted chars
    """
    strHolder = ''
    new_text = text

    for each in DICTIONARY:
        if DICTIONARY[each] in text:
            new_text = new_text.replace(DICTIONARY[each], each)
    return new_text

def lets_print_string(text):
    """
    Send text to printer
    """
#    return  # If no printer is used. just return, good for debugging
    text_list = text.split('\n')
    try:
        with open('/dev/usb/lp0', 'w') as lpt:
            for line in text_list:
                lpt.write(line+'\n')

    except Exception as e:
        print('%s %s' % (PRINTEREXCEPTION, e))

        
def get_timestamp():
    """
    Return the local timestamp in a nice human readable format
    """
    return datetime.utcnow().strftime('%Y%m%d_%H%M%S')


def get_year():
    """
    Return current YEAR in YYYY format
    """
    return datetime.utcnow().strftime('%Y')


def save_data_to_file(data):
    """
    Save the data to file
    """
    time_stamp = get_timestamp()

    filename = 'saved_data_%s.html' % time_stamp
    file_with_path = os.path.abspath(filename)

    try:
        file = open(file_with_path, 'a')  # open filename to append to it
        file.write('%s' % str(data))
        file.close()
    except Exception as e:
        print('There was a problem opening the file: %s' % file_with_path)
        print(e)


def read_from_file(file_with_path):
    """
    Open file, read it and return content as string.
    """
    file_content_list = []
    try:
        file = open(file_with_path, 'r')  # open filename to read it
        for line in file:
            file_content_list.append(line)
        file.close()
    except Exception as e:
        print('There was a problem opening the file: %s' % file_with_path)
        print(e)

    # compress the list to a string and return it
    return "".join(file_content_list)


def subprocess_curl(url):
    """
    run curl as subprocess, return the stdout
    """
    print('curl %s' % url)
    try:
        stdout = subprocess.check_output(["curl", url])
    except Exception as e:
        print('%s %s' % (SUBPROCESSEXCEPTION, e))
    return stdout


def csdboneliner(data):
    # investigate data from: 
    # http://csdb.dk
    # fish out the oneliners seen on the left edge

    # first, do a rough cut of the page to remove stuff we do not want.
    bottom = data.split('Add Oneliner')
    keeper = bottom[1]
    keeper = keeper.split('donate.php')

    # now, finetune the cut...
    keeper = keeper[0].split('color=black>')    
    keeper = keeper[1].split('</font>')[0]

    # now we have isolated the Oneliner -part of the page, yaaay...

    # now lets split again to get each chat-line with comments

    partthree = keeper.strip().split('<i>')

    data_list = []
    for each in partthree:
        # danger Will Robinson, danger !
        formatted_string = each.replace('</i>', ' ').replace('<br>', '').replace('\n', '').replace('\r', '')

        # cat hip replacement, lets use concrete

        if formatted_string:
            data_list.append(formatted_string)

    return data_list


def latest_releases(data):

    #TODO The filters here are not correct, but they work... Possible to trim better.
    bottom = data.split('Latest Additions')
    keeper = bottom[1] # skip the top part
    top = keeper.split('Latest Forum Posts')
    keeper = top[0]

    yyyy = get_year()+'-'
    get_items = keeper.split('<tr')

    data_list = []
    for each in get_items:
        subitem = each.split('<td')
        scn_name = ''
        name = ''
        rel_type = ''
        grp_name = ''
        release_date = ''

        for each_sub in subitem:
            found = False
            
            if 'SCENER' in each_sub.upper():
                scener_name = each_sub.split('</a>')[0]
                scn_name = scener_name.split('>')[-1]
                found = True
            elif 'RELEASE/?ID' in each_sub.upper():
                release_name = each_sub.split('</a>')[0]
                name = release_name.split('>')[-1]
                found = True
            elif 'RELEASETYPE_ID' in each_sub.upper():
                release_type = each_sub.split('</a>')[0]
                rel_type = release_type.split('>')[-1]
                found = True
            elif 'GROUP/?ID=' in each_sub.upper():
                group_name = each_sub.split('</a>')[0]
                grp_name = group_name.split('>')[-1]
                found = True
            elif yyyy in each_sub:
                rel_date = each_sub.split('</font>')[0]
                release_date = rel_date.split('>')[-1]
                found = True
#            else: # FOR TEST AND DEBUGGING, ENABLE THIS
#                print('UNKNOWN: %s' % each_sub)

        if found:
            release_info = {'date': release_date,
                            'group': grp_name.strip(),
                            'type': rel_type.strip(),
                            'scener': scn_name.strip(),
                            'name': name.strip()}
            data_list.append(release_info)

    return data_list


def latest_additions(data):

    bottom = data.split('<b>Latest Additions')
    keeper = bottom[1] # skip the top part
    top = keeper.split('latestforumposts.php')
    keeper = top[0]

    get_items = keeper.split('<tr')

    data_list = []
    for each in get_items:
        subitem = each.split('<td')
        scn_name = ''
        name = ''
        rel_type = ''
        grp_name = ''
        release_date = ''
        event_name = ''

        found = False
        for each_sub in subitem:
            if 'SCENER' in each_sub.upper():
                scener_name = each_sub.split('</a>')[0]
                scn_name = scener_name.split('>')[-1]
                found = True
            elif 'RELEASE/?ID' in each_sub.upper():
                release_name = each_sub.split('</a>')[0]
                name = release_name.split('>')[-1]
                found = True
            elif 'RELEASETYPE_ID' in each_sub.upper():
                release_type = each_sub.split('</a>')[0]
                rel_type = release_type.split('>')[-1]
                found = True
            elif 'GROUP/?ID=' in each_sub.upper():
                group_name = each_sub.split('</a>')[0]
                grp_name = group_name.split('>')[-1]
                found = True
            elif 'EVENT/?ID=' in each_sub.upper():
                rel_date = each_sub.split('</a>')[0]
                event_name = rel_date.split('>')[-1]
                found = True
#            else: # FOR TEST AND DEBUGGING, ENABLE THIS
#                print('UNKNOWN: %s' % each_sub)

        if found:
            if name.strip():
                release_info = {'group': grp_name.strip(),
                                'scener': scn_name.strip(),
                                'event': event_name.strip(),
                                'name': name.strip()}
                data_list.append(release_info)

    return data_list


def main(testfile = False):
    url = 'http://csdb.dk'
    if testfile: # yes i know. i should not use a parameter for more than one thing... it is ugly. but hey... it works.
        if 'FILEDUMP' in testfile:
            data = subprocess_curl(url)
            save_data_to_file(data)
            print('%s have been saved to file. (you can use it for test if you like to run offline)' % url)
            return

    if testfile:
        data = read_from_file(testfile)

    list_with_oneliners = []
    list_with_releases = []
    list_with_additions = []

    while True:
        dinosaurs_in_closet = False
        previous_oneliner_list = list_with_oneliners
        previous_release_list = list_with_releases
        previous_additions_list = list_with_additions

        if not testfile:
            data = subprocess_curl(url)

        list_with_oneliners = csdboneliner(data)
        # now, check the list if there are something new.
        # but we want it in the reverse order to make the print nice.
        print('Oneliners: %d %dB' % (len(list_with_oneliners), len("".join(list_with_oneliners)) ))
        for each in list_with_oneliners[::-1]:
            if each not in previous_oneliner_list:
                nice_string = 'Oneliner: %s' % each
                nice_string = translateHTML(nice_string)
                lets_print_string(nice_string)
                print(nice_string)

                dinosaurs_in_closet = True

        list_with_releases = latest_releases(data)
        # now, check the list if there are something new.
        # but we want it in the reverse order to make the print nice.
#        print('additions: %d' % len(list_with_releases) )
        print('Releases: %d %dB' % (len(list_with_releases), len(str(list_with_releases)) ))
        for each in list_with_releases[::-1]:
            if each['date']:
                if each not in previous_release_list:
                    nice_print = 'Release: %s %s' % (each['name'], each['type'])
                    nice_string = translateHTML(nice_string)
                    print(nice_print)
                    lets_print_string(nice_print)

                    nice_print = '   ^    By: %s %s %s' % (each['group'], each['scener'], each['date'])
                    nice_string = translateHTML(nice_string)
                    print(nice_print)
                    lets_print_string(nice_print)


                    dinosaurs_in_closet = True

        list_with_additions = latest_additions(data)
        # now, check the list if there are something new.
        # but we want it in the reverse order to make the print nice.
#        print('additions: %d' % len(list_with_additions) )
        print('Additions: %d %dB' % (len(list_with_additions), len(str(list_with_additions)) ))
        for each in list_with_additions[::-1]:
            if each not in previous_additions_list:
                nice_print = 'Addition: %s @ %s' % ( each['name'], each['event'])
                nice_string = translateHTML(nice_string)
                print(nice_print)
                lets_print_string(nice_print)

                nice_print = '   ^    By: %s %s' % (each['group'], each['scener'])
                nice_string = translateHTML(nice_string)
                print(nice_print)
                lets_print_string(nice_print)

                dinosaurs_in_closet = True

        # it it was run with a testfile will we end here.
        if testfile:
            break

        retry_time = 5
        if not dinosaurs_in_closet:
            print('Nothing New... Will check again in %d seconds. CTRL+C to end\n' % retry_time)
        sleep(retry_time)

print('\n\n\n\n\n\n\n')
print('------------------------------------------------------')
print(SCRIPTNAME)
print(SCRIPTTEXT)
print(SCRIPTCREATEDBY)
print(SCRIPTWORKSON)
print('Version: ' + SCRIPTVERSION)
print('\n')
print(HELPTEXT)
print('------------------------------------------------------')
print('\n')
sleep(7)
testfile = False

for each_arg in sys.argv[1:]:
    print('Extra arguments on command line: %s' % each_arg)
    testfile = each_arg

print('------------------------------------------------------')

try:
    main(testfile)
except Exception as e:
    print('End: %s' % e)