Primera version operativa del script

2025-06-03 16:38:36 +02:00 · 2025-06-03 16:38:36 +02:00 · 333083c3c7
commit 333083c3c7
7 changed files with 315 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
+__pycache__/
+*.pyc
+*.pyo
+.env
+venv/
--- a/DestinosParseados.xlsx
+++ b/DestinosParseados.xlsx
--- a/Vacantes.xlsx
+++ b/Vacantes.xlsx
--- a/cabs.json
+++ b/cabs.json
@ -0,0 +1,32 @@
+[
+  {
+    "nombre_cabecera": "PUESTO\nNÚMERO",
+    "n_campos": 1,
+    "campos": ["PUESTO NUMERO"]
+  },
+  {
+    "nombre_cabecera": "",
+    "n_campos": 1,
+    "campos": ["MINISTERIO / ORGANISMO"]
+  },
+  {
+    "nombre_cabecera": "CENTRO DIRECTIVO/00.A.A\nCENTRO DE DESTINO",
+    "n_campos": 2,
+    "campos": ["CENTRO DIRECTIVO/00.A.A", "CENTRO DE DESTINO"]
+  },
+  {
+    "nombre_cabecera": "PROVINCIA\nLOCALIDAD",
+    "n_campos": 2,
+    "campos": ["PROVINCIA","LOCALIDAD"]
+  },
+  {
+    "nombre_cabecera": "PUESTO DE TRABAJO",
+    "n_campos": 2,
+    "campos": [ "PUESTO DE TRABAJO", "RPT COD"]
+  },
+  {
+    "nombre_cabecera": "NIVEL C.D.\nC. ESPECÍFICO",
+    "n_campos": 2,
+    "campos": ["NIVEL C.D.", "C. ESPECÍFICO"]
+  }
+]
--- a/helpers.py
+++ b/helpers.py
@ -0,0 +1,40 @@
+import json
+
+class CabColumna:
+    def __init__(self, nombre_cabecera, n_campos):
+        self.nombre_cabecera = nombre_cabecera
+        self.n_campos = n_campos
+        self.indice=""
+        self.campos = ['' for _ in range(n_campos)]
+    def __repr__(self):
+        return f"CabColumna({self.nombre_cabecera!r}, {self.n_campos}, {self.campos})"
+
+
+class ListaCabColumnas:
+    def __init__(self):
+        self.lista = []
+
+    def añadir(self, cab_columna):
+        if isinstance(cab_columna, CabColumna):
+            self.lista.append(cab_columna)
+
+    def eliminar(self, index):
+        if 0 <= index < len(self.lista):
+            del self.lista[index]
+
+    def exportar(self, ruta):
+        with open(ruta, 'w', encoding='utf-8') as f:
+            json.dump([{
+                'nombre_cabecera': c.nombre_cabecera,
+                'n_campos': c.n_campos,
+                'campos': c.campos
+            } for c in self.lista], f, indent=2, ensure_ascii=False)
+
+    def importar(self, ruta):
+        with open(ruta, 'r', encoding='utf-8') as f:
+            datos = json.load(f)
+            self.lista = [
+                CabColumna(d['nombre_cabecera'], d['n_campos']) for d in datos
+            ]
+            for i, d in enumerate(datos):
+                self.lista[i].campos = d['campos']
--- a/main.py
+++ b/main.py
@ -0,0 +1,218 @@
+import os
+import curses
+import pandas as pd
+import numpy as np
+from colorama import init, Fore, Style
+
+from helpers import CabColumna, ListaCabColumnas
+
+# Inicializa colorama
+init(autoreset=True)
+
+def limpiar_pantalla():
+    os.system('cls' if os.name == 'nt' else 'clear')
+
+def seleccionar_archivo_curses(stdscr):
+    curses.curs_set(0)  # Ocultar cursor
+    archivos = [f for f in os.listdir() if f.lower().endswith(('.xls', '.xlsx'))]
+    if not archivos:
+        stdscr.addstr(0, 0, "No se encontraron archivos .xls o .xlsx en el directorio actual.")
+        stdscr.getch()
+        return None
+
+    seleccion = 0
+
+    while True:
+        stdscr.clear()
+        stdscr.addstr(0, 0, "Selecciona un archivo Excel (.xls, .xlsx):\n\n")
+
+        for idx, archivo in enumerate(archivos):
+            if idx == seleccion:
+                stdscr.addstr(idx + 2, 0, f"> {archivo}", curses.A_REVERSE)
+            else:
+                stdscr.addstr(idx + 2, 0, f"  {archivo}")
+
+        stdscr.addstr(len(archivos) + 3, 0, "Usa ↑↓ para moverte, Enter para seleccionar, q para salir.")
+
+        tecla = stdscr.getch()
+
+        if tecla == curses.KEY_UP and seleccion > 0:
+            seleccion -= 1
+        elif tecla == curses.KEY_DOWN and seleccion < len(archivos) - 1:
+            seleccion += 1
+        elif tecla == ord('q'):
+            return None
+        elif tecla == 10:  # Enter
+            return os.path.abspath(archivos[seleccion])
+
+def seleccionar_archivo():
+    return curses.wrapper(seleccionar_archivo_curses)
+
+
+
+memoria_combinaciones = {}
+def elegir_combinacion(strtemp):
+    combinaciones = []
+
+    if len(strtemp) == 3:
+        combinaciones = [
+            (' '.join(strtemp[:1]), ' '.join(strtemp[1:])),
+            (' '.join(strtemp[:2]), strtemp[2])
+        ]
+    elif len(strtemp) == 4:
+        combinaciones = [
+            (' '.join(strtemp[:1]), ' '.join(strtemp[1:])),
+            (' '.join(strtemp[:2]), ' '.join(strtemp[2:])),
+            (' '.join(strtemp[:3]), strtemp[3])
+        ]
+    else:
+        return strtemp  # No intervenimos en otros casos
+
+    claves_posibles = [parte1 for parte1, parte2 in combinaciones]
+
+    for i, clave in enumerate(claves_posibles):
+        if clave in memoria_combinaciones:
+            return (clave, memoria_combinaciones[clave])
+
+    # Mostrar menú al usuario
+    print("\nSe detectaron múltiples líneas. Elige cómo dividirlas:")
+    for i, (parte1, parte2) in enumerate(combinaciones):
+        print(f"{i+1}) Parte 1: \"{parte1}\" | Parte 2: \"{parte2}\"")
+
+    while True:
+        opcion = input("Introduce el número de la opción deseada: ").strip()
+        if opcion.isdigit() and 1 <= int(opcion) <= len(combinaciones):
+            seleccion = combinaciones[int(opcion) - 1]
+            clave_general = seleccion[0]  # Solo recordamos la parte 1
+            memoria_combinaciones[clave_general] = seleccion[1]  # Guardamos parte 2 asociada
+            return seleccion
+        else:
+            print("Opción inválida. Intenta de nuevo.")
+
+def parsear_documento(archivo):
+    print("\n>>> Parsear documento")
+    print(f"Parseando archivo: {archivo} ...")
+
+    df = pd.read_excel(archivo, engine='openpyxl', header=None)
+    df = df.replace(r'^\s*$', np.nan, regex=True)
+    df = df.dropna(how='all').dropna(axis=1,how='all')
+
+    lista_cab_columnas=ListaCabColumnas()
+    lista_cab_columnas.importar('cabs.json')
+
+    cabeceras = []
+    ministerio = ''
+    for cabecera in lista_cab_columnas.lista:
+        if cabecera.nombre_cabecera!="": #
+            cabecera.indice=df.columns[df.apply(lambda col: col.astype(str).str.contains(cabecera.nombre_cabecera)).any()].tolist()[0]
+        cabeceras.extend(list(map(str, cabecera.campos)))
+    df_parseado=pd.DataFrame(columns=cabeceras)
+
+    for index, row in df.iterrows():
+        valor_celda = row.iloc[0]
+        if isinstance(valor_celda, str) and lista_cab_columnas.lista[0].nombre_cabecera not in valor_celda:
+            row=row.dropna()
+            ministerio = ' '.join(row.astype(str))
+        elif isinstance(valor_celda, str) and lista_cab_columnas.lista[0].nombre_cabecera in valor_celda:
+            continue
+        else:
+            fila_index = len(df_parseado)
+            for cabecera in lista_cab_columnas.lista:
+                if cabecera.nombre_cabecera!="": #
+                    if len(cabecera.campos)>1:
+                        strtemp = row[cabecera.indice].split('\n')
+                        if len(strtemp) in (3, 4):
+                            strtemp = elegir_combinacion(strtemp)
+                        else:
+                            strtemp = [' '.join(strtemp[:-1]), strtemp[-1]]
+
+                        for i, campo in enumerate(cabecera.campos):
+                            valor = strtemp[i] if i < len(strtemp) else ""
+                            df_parseado.at[fila_index, campo] = valor
+
+
+
+                    else:
+                        df_parseado.at[fila_index,cabecera.campos[0]] = str(row[cabecera.indice]).replace('\n',' ')
+                else:
+                    df_parseado.at[fila_index,cabecera.campos[0]] = ministerio      
+    input("\nPresiona Enter para continuar...")                   
+    return df_parseado
+
+def exportar_a_excel(documento_parseado):
+    print("\n>>> Exportar a Excel")
+    try:
+        documento_parseado.to_excel('DestinosParseados.xlsx', index=False)
+        print("Documento exportado a Excel.")
+    except Exception as e:
+        print(f"Error al exportar: {e}")
+    input("\nPresiona Enter para continuar...")
+
+def mostrar_menu(archivo, documento_parseado):
+    print("=== Menú Principal ===\n")
+
+    print("1) Seleccionar archivo")
+
+    if archivo:
+        print(f"2) Parsear documento {Fore.GREEN}{os.path.basename(archivo)}{Fore.RESET}")
+    else:
+        print(Fore.LIGHTBLACK_EX + "2) Parsear documento [desactivado]")
+
+    if archivo and documento_parseado is not None and not documento_parseado.empty:
+        print("3) Exportar a Excel")
+    else:
+        print(Fore.LIGHTBLACK_EX + "3) Exportar a Excel [desactivado]")
+
+    print("4) Modificar archivo ya parseado")
+
+    print("0) Salir")
+
+def main():
+    archivo = None
+    archivoParser = None
+    documento_parseado = None
+    while True:
+        limpiar_pantalla()
+        mostrar_menu(archivo, documento_parseado)
+        opcion = input("\nSeleccione una opción: ").strip()
+
+        if opcion == "1":
+            archivo = seleccionar_archivo()
+            if archivo:
+                print(f"\nArchivo seleccionado: {archivo}")
+            else:
+                print("\nNo se seleccionó ningún archivo.")
+            documento_parseado = None
+
+        elif opcion == "2":
+            if not archivo:
+                print(Fore.RED + "Opción desactivada: debes seleccionar un archivo primero.")
+            else:
+                documento_parseado = parsear_documento(archivo)
+
+        elif opcion == "3":
+            if not archivo:
+                print(Fore.RED + "Opción desactivada: primero selecciona un archivo.")
+            elif documento_parseado is None or documento_parseado.empty:
+                print(Fore.RED + "Opción desactivada: debes parsear el archivo antes de exportar.")
+            else:
+                exportar_a_excel(documento_parseado)
+
+        elif opcion =="4":
+            archivoParser = seleccionar_archivo()
+            if archivo:
+                print(f"\nArchivo seleccionado: {archivo}")
+            else:
+                print("\nNo se seleccionó ningún archivo.")
+            
+        elif opcion == "0":
+            print("\nSaliendo del programa...")
+            break
+
+        else:
+            print(Fore.RED + "Opción no válida.")
+            input("\nPresiona Enter para continuar...")
+
+
+if __name__ == "__main__":
+    main()
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,20 @@
+Bottleneck==1.5.0
+colorama==0.4.6
+defusedxml==0.7.1
+et_xmlfile==2.0.0
+llvmlite==0.44.0
+numba==0.61.2
+numexpr==2.10.2
+numpy==2.2.6
+odfpy==1.4.1
+openpyxl==3.1.5
+packaging==25.0
+pandas==2.2.3
+python-calamine==0.3.2
+python-dateutil==2.9.0.post0
+pytz==2025.2
+pyxlsb==1.0.10
+six==1.17.0
+tzdata==2025.2
+xlrd==2.0.1
+XlsxWriter==3.2.3