#!/usr/bin/python2

import os
import sys
import urllib2
import time
import HTMLParser
import json
import math
import tempfile
from wkt import parse_wkt

def getdata(url):
    #print url
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    try:
        request = urllib2.Request(url, headers=headers)
        f = urllib2.urlopen(request)
        data = f.read()
        f.close()
    except urllib2.URLError:
        secs = 60
        print " sleep(%s) to retry" % secs,
        time.sleep(secs)        
        request = urllib2.Request(url, headers=headers)
        f = urllib2.urlopen(request)
        data = f.read()
        f.close()
        
    #print data
    return data.decode("utf-8")

def writefile(pathname,data):
    f = open(pathname,"wt")
    f.write(data.encode("utf-8"))
    f.close()
    
def readfile(pathname):
    f = open(pathname,"rt")
    data = f.read()
    f.close()
    return data.decode("utf-8")

def getDevelProperties():
    props = {}
    home = os.environ.get('HOME')
    fname = os.path.join(home,".gvsig-devel.properties")
    try:
        with open(fname, 'r') as f:
            for linea in f:
                linea = linea.strip()
                if linea and not linea.startswith('#'):
                    clave, valor = linea.split('=', 1)
                    props[clave.strip()] = valor.strip()
    except IOError as e:
        print str(e)
        print "Error: No se pudo leer el archivo", fname        
    return props

    
def getOperationWkt(id):
    page = getdata("http://epsg.io/"+id)
    if page.startswith("error"):
        print "ERROR:",page
        raise
    s1 = '<li><a id="s_wkt2_copyFormatted" class="clipboard" data-clipboard-text="'
    s2 = '">Copy Formatted</a></li>'
    n1 = page.find(s1)+len(s1)
    if n1<len(s1):
        return None
    n2 = page[n1:].find(s2)+n1
    if n2<=n1:
        raise
    s = page[n1:n2]
    return HTMLParser.HTMLParser().unescape(s)

def getOperationIdsFromAPI(apikey):
    # Ejemplo de llamada al api:
    #   https://api.maptiler.com/coordinates/search/deprecated:COORDOP%20kind:COORDOP.json?limit=50&offset=0&key={apikey}
    # Documentaciondel API:
    #   https://docs.maptiler.com/cloud/api/coordinates/#search-coordinate-systems-path-query
    # Pagina para la gestion de claves:
    #   https://cloud.maptiler.com/account/keys/
    #   Anadir al fichero .gvsig-devel.properties una entrada con:
    #   org.gvsig.crs.catalog.data.epsgio.apikey={apikey}
    #

    print "Use Maptiler API"
    print "Calculating operations page size"
    url = "https://api.maptiler.com/coordinates/search/deprecated:COORDOP%%20kind:COORDOP.json?key=%s&limit=%s&offset=%s" % (
        apikey,
        1,
        0
    )
    resp_s = getdata(url)
    resp = json.loads(resp_s)
    
    limit = 50
    num_operations = int(resp.get("total"))
    num_pages = int(math.ceil(float(num_operations)/limit))

    print "Total %s operations in %s pages of %s elements" % (num_operations,num_pages,limit)
    codes = list()    
    for cur_page in range(num_pages):
        print "\rDownload page %s/%s" % (cur_page,num_pages),
        sys.stdout.flush()
        url = "https://api.maptiler.com/coordinates/search/deprecated:COORDOP%%20kind:COORDOP.json?key=%s&limit=%s&offset=%s" % (
            key,
            limit,
            cur_page*limit
        )
        resp_s = getdata(url)
        resp = json.loads(resp_s)
        for op in resp["results"]:
            code = str(op["id"]["code"])
            codes.append(code)
        if cur_page % 40 == 39:
            time.sleep(4)
    print
    return codes
    
def getOperationIdsFromHTMLPage():    
    print "Use WEP pages"
    cur_page = 1
    page = getdata("https://epsg.io/?q=%%20kind%%3ACOORDOP&page=%s" % cur_page)
    n = page.find('<ul class="results">')
    s = page[n:]
    
    codes = list()
    while True:
        print "\rDownload page %s" % cur_page,
        n1 = s.find('<li>')
        n2 = s.find('</ul>')
        if n1<0 or n1>n2:
            cur_page+=1
            time.sleep(2)
            page = getdata("https://epsg.io/?q=%%20kind%%3ACOORDOP&page=%s" % cur_page)
            n = page.find('<ul class="results">')
            s = page[n:]
            n1 = s.find('<li>')
            n2 = s.find('</ul>')
            if n1<0 or n1>n2:
                break
            s = page[n:]
        entry = s[n1:]
        if entry.startswith('<li><a href="/'):        
            s = entry[10:]
        else:
            n = entry.find('<a href="/')
            s = entry[n+10:]
            n = s.find('"')
            x = s[:n]
            codes.append(x)
    print 
    return codes


def getOperationIds():
    props = getDevelProperties()
    apikey = props.get("org.gvsig.crs.catalog.data.epsgio.apikey",None)
    if apikey == None:
        return getOperationIdsFromHTMLPage()
    else:
        return getOperationIdsFromAPI(apikey)

def main():
    datadir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(sys.argv[0]))),"data","authorities")

    print "\nDownload transformations from epsg.io"
    authority = "epsg"
    authorityid = authority.lower()
    authdir = os.path.join(datadir,authorityid)
    tempops_f = os.path.join(tempfile.gettempdir(),"epsgio_ops.json")    
    transdir = os.path.join(authdir,"transformation")
    
    if not os.path.isdir(transdir):
        os.makedirs(transdir)

    trans_csv = open(os.path.join(authdir,"transformation.csv"),"wt")
    trans_csv.write("#id,description,sourcecrs,targetcrs,type,method\n");

    if os.path.isfile(tempops_f):
        print "Load operations list from cached file"
        s = readfile(tempops_f)
        operations = json.loads(s)
    else:            
        operations = getOperationIds()
        writefile(tempops_f,json.dumps(operations))
    count=0
    total = len(operations)
    print "Process operations"
    for code in operations:
        print "EPSG:%s (%s/%s)" % (code, count, total),
        count+=1
        wkt_file = os.path.join(transdir,code)+".wkt"
        if os.path.isfile(wkt_file) :
            print " from cache",
            wkt_s = readfile(wkt_file)
        else:
            if count % 40 == 0:
                time.sleep(0.5*60)
            print " download",
            wkt_s = getOperationWkt(code)            
            if wkt_s == None:
                print " not available"
                continue
            writefile(wkt_file,wkt_s)            
        print " parsing",
        try:
            wkt = parse_wkt(wkt_s)
        except:
            print " error parsing wkt"
            continue
        try:
            desc = wkt[0][0]
        except:
            print " error getting description",
            desc = ""
        try:
            source_id = wkt[0]["SOURCECRS"][0]["ID"]
            source_id = ("%s:%s" % (source_id[0],source_id[1]))
        except:
            print " error getting source",
            source_id = ""
        try:
            target_id = wkt[0]["TARGETCRS"][0]["ID"]
            target_id = ("%s:%s" % (target_id[0],target_id[1]))
        except:
            print " error getting target",
            target_id = ""
        try:
            method = wkt[0]["METHOD"][0]
        except:
            print " error getting method",
            method = ""
        try:
            type = wkt.getname(0)
        except:
            print " error getting type",
            type = ""
        try:
            line = "%s,%s,%s,%s,%s,%s\n" % (
                code.replace(",","\\x2c"),
                desc.replace(",","\\x2c"),
                source_id.replace(",","\\x2c"),
                target_id.replace(",","\\x2c"),
                type.replace(",","\\x2c"),
                method.replace(",","\\x2c"),
            )
        except:
            print " error formating record"
            continue
        print " ok"
        trans_csv.write(line.encode("utf-8"))
        trans_csv.flush()
        
if __name__ == "__main__":
    main()
    