#!/usr/bin/python3
#
# Generate a custom linker script/map file for the --enabled-mergedlibs merged library
# which reduces the startup time and enables further optimisations with --enable-lto because 60% or more
# of the symbols become internal only.
#

import subprocess
import re

exported_symbols1 = set()
imported_symbols1 = set()
exported_symbols2 = set() # decoded
imported_symbols2 = set() # decoded


# find all our shared libs, note that all the libs that have been merged into the mergedlib library will
# not contain anything, those files are just empty
subprocess_find = subprocess.Popen("find ./instdir -name *.so && find ./workdir/LinkTarget/CppunitTest -name *.so",
    stdout=subprocess.PIPE, shell=True)
with subprocess_find.stdout as txt:
    for line in txt:
        sharedlib = line.strip()
        # look for exported symbols
        subprocess_nm = subprocess.Popen(b"nm -D " + sharedlib, stdout=subprocess.PIPE, shell=True)
        with subprocess_nm.stdout as txt2:
            # We are looking for lines something like:
            # 0000000000036ed0 T flash_component_getFactory
            line_regex = re.compile(r'^[0-9a-fA-F]+ T ')
            for line2_bytes in txt2:
                line2 = line2_bytes.strip().decode("utf-8")
                if line_regex.match(line2):
                    sym = line2.split(" ")[2].strip()
                    exported_symbols1.add(sym)
        subprocess_nm.terminate()
        # look for imported symbols
        subprocess_objdump = subprocess.Popen(b"objdump -T " + sharedlib, stdout=subprocess.PIPE, shell=True)
        with subprocess_objdump.stdout as txt2:
            # ignore some header bumpf
            txt2.readline()
            txt2.readline()
            txt2.readline()
            txt2.readline()
            # We are looking for lines something like:
            # 0000000000000000      DF *UND*  0000000000000000     _ZN16FilterConfigItem10WriteInt32ERKN3rtl8OUStringEi
            for line2_bytes in txt2:
                line2 = line2_bytes.strip().decode("utf-8")
                if "*UND*" not in line2:
                    continue
                tokens = line2.split(" ")
                sym = tokens[len(tokens)-1].strip()
                imported_symbols1.add(sym)
        subprocess_objdump.terminate()
subprocess_find.terminate()

# look for imported symbols in executables
subprocess_find = subprocess.Popen("find ./instdir -name *.bin", stdout=subprocess.PIPE, shell=True)
with subprocess_find.stdout as txt:
    for line in txt:
        executable = line.strip()
        # look for exported symbols
        subprocess_nm = subprocess.Popen(b"nm -D " + executable + b" | grep -w U", stdout=subprocess.PIPE, shell=True)
        with subprocess_nm.stdout as txt2:
            # We are looking for lines something like:
            # U sal_detail_deinitialize
            for line2_bytes in txt2:
                line2 = line2_bytes.strip().decode("utf-8")
                sym = line2.split(" ")[1]
                imported_symbols1.add(sym)
subprocess_find.terminate()

# Now we have to symbolize before comparing because sometimes (due to thunks) two
# different encoded names symbolize to the same method/func name
#
progress = 0
progress_max_len = len(imported_symbols1) + len(exported_symbols1)
for sym in imported_symbols1:
    progress += 1
    if (progress % 128 == 0):
        print( str(int(progress * 100 / progress_max_len)) + "%")
    filtered_sym = subprocess.check_output(["c++filt", sym]).strip().decode("utf-8")
    if filtered_sym.startswith("non-virtual thunk to "):
        filtered_sym = filtered_sym[21:]
    elif filtered_sym.startswith("virtual thunk to "):
        filtered_sym = filtered_sym[17:]
    imported_symbols2.add(filtered_sym)
progress = 0
for sym in exported_symbols1:
    progress += 1
    if (progress % 128 == 0):
        print( str(int(progress * 100 / progress_max_len)) + "%")
    filtered_sym = subprocess.check_output(["c++filt", sym]).strip().decode("utf-8")
    if filtered_sym.startswith("non-virtual thunk to "):
        filtered_sym = filtered_sym[21:]
    elif filtered_sym.startswith("virtual thunk to "):
        filtered_sym = filtered_sym[17:]
    exported_symbols2.add(filtered_sym)


unused_exports = exported_symbols2 - imported_symbols2
print("exported       = " + str(len(exported_symbols2)))
print("imported       = " + str(len(imported_symbols2)))
print("unused_exports = " + str(len(unused_exports)))

# for each class, count how many symbols will become hidden if we mark the class as hidden
can_be_hidden_count = dict()
for sym in exported_symbols2:
    i = sym.rfind("::")
    if i == -1:
        continue
    clz = sym[:i]
    if clz in can_be_hidden_count:
        can_be_hidden_count[clz] = can_be_hidden_count[clz] + 1
    else:
        can_be_hidden_count[clz] = 1
for sym in imported_symbols2:
    i = sym.rfind("::")
    if i == -1:
        continue
    clz = sym[:i]
    if clz in can_be_hidden_count:
        can_be_hidden_count[clz] = can_be_hidden_count[clz] - 1
    else:
        can_be_hidden_count[clz] = -1
# convert to list, and sort the results in descending order
can_be_hidden_list = list()
for clz in can_be_hidden_count:
    cnt = can_be_hidden_count[clz]
    if cnt > 0:
        can_be_hidden_list.append((cnt, clz))
can_be_hidden_list.sort(reverse=True)
with open("bin/find-mergedlib-can-be-private-symbols.classes.results", "wt") as f:
    for i in can_be_hidden_list:
        if i[0] < 10:
            break
        f.write(str(i[0]) + " " + i[1] + "\n")


with open("bin/find-mergedlib-can-be-private-symbols.functions.results", "wt") as f:
    for sym in sorted(unused_exports):
        # Filter out most of the noise.
        # No idea where these are coming from, but not our code.
        if sym.startswith("CERT_"):
            continue
        elif sym.startswith("DER_"):
            continue
        elif sym.startswith("FORM_"):
            continue
        elif sym.startswith("FPDF"):
            continue
        elif sym.startswith("HASH_"):
            continue
        elif sym.startswith("Hunspell_"):
            continue
        elif sym.startswith("LL_"):
            continue
        elif sym.startswith("LP_"):
            continue
        elif sym.startswith("LU"):
            continue
        elif sym.startswith("MIP"):
            continue
        elif sym.startswith("MPS"):
            continue
        elif sym.startswith("NSS"):
            continue
        elif sym.startswith("NSC_"):
            continue
        elif sym.startswith("PK11"):
            continue
        elif sym.startswith("PL_"):
            continue
        elif sym.startswith("PQ"):
            continue
        elif sym.startswith("PBE_"):
            continue
        elif sym.startswith("PORT_"):
            continue
        elif sym.startswith("PRP_"):
            continue
        elif sym.startswith("PR_"):
            continue
        elif sym.startswith("PT_"):
            continue
        elif sym.startswith("QS_"):
            continue
        elif sym.startswith("REPORT_"):
            continue
        elif sym.startswith("RSA_"):
            continue
        elif sym.startswith("SEC"):
            continue
        elif sym.startswith("SGN"):
            continue
        elif sym.startswith("SOS"):
            continue
        elif sym.startswith("SSL_"):
            continue
        elif sym.startswith("VFY_"):
            continue
        elif sym.startswith("_PR_"):
            continue
        elif sym.startswith("ber_"):
            continue
        elif sym.startswith("bfp_"):
            continue
        elif sym.startswith("ldap_"):
            continue
        elif sym.startswith("ne_"):
            continue
        elif sym.startswith("opj_"):
            continue
        elif sym.startswith("pg_"):
            continue
        elif sym.startswith("pq"):
            continue
        elif sym.startswith("presolve_"):
            continue
        elif sym.startswith("sqlite3_"):
            continue
        elif sym.startswith("libepubgen::"):
            continue
        elif sym.startswith("lucene::"):
            continue
        elif sym.startswith("Hunspell::"):
            continue
        elif sym.startswith("sk_"):
            continue
        elif sym.startswith("_Z"):
            continue
        # dynamically loaded
        elif sym.endswith("get_implementation"):
            continue
        elif sym.endswith("component_getFactory"):
            continue
        elif sym == "CreateUnoWrapper":
            continue
        elif sym == "ExportDOC":
            continue
        elif sym == "ExportRTF":
            continue
        elif sym == "GetSaveWarningOfMSVBAStorage_ww8":
            continue
        elif sym == "GetSpecialCharsForEdit":
            continue
        elif sym.startswith("Import"):
            continue
        elif sym.startswith("Java_com_sun_star_"):
            continue
        elif sym.startswith("TestImport"):
            continue
        elif sym.startswith("getAllCalendars_"):
            continue
        elif sym.startswith("getAllCurrencies_"):
            continue
        elif sym.startswith("getAllFormats"):
            continue
        elif sym.startswith("getBreakIteratorRules_"):
            continue
        elif sym.startswith("getCollationOptions_"):
            continue
        elif sym.startswith("getCollatorImplementation_"):
            continue
        elif sym.startswith("getContinuousNumberingLevels_"):
            continue
        elif sym.startswith("getDateAcceptancePatterns_"):
            continue
        elif sym.startswith("getForbiddenCharacters_"):
            continue
        elif sym.startswith("getIndexAlgorithm_"):
            continue
        elif sym.startswith("getLCInfo_"):
            continue
        elif sym.startswith("getLocaleItem_"):
            continue
        elif sym.startswith("getOutlineNumberingLevels_"):
            continue
        elif sym.startswith("getReservedWords_"):
            continue
        elif sym.startswith("getSTC_"):
            continue
        elif sym.startswith("getSearchOptions_"):
            continue
        elif sym.startswith("getTransliterations_"):
            continue
        elif sym.startswith("getUnicodeScripts_"):
            continue
        elif sym.startswith("lok_"):
            continue
        # UDK API
        elif sym.startswith("osl_"):
            continue
        elif sym.startswith("rtl_"):
            continue
        elif sym.startswith("typelib_"):
            continue
        elif sym.startswith("typereg_"):
            continue
        elif sym.startswith("uno_"):
            continue
        f.write(sym + "\n")

