Source code for src.clapi.clapi

"""Clingo API module for organizing and processing course data.

.. autosummary::
    :nosignatures:

    process_course_data_clingo
    translate_range
    append_rules
    query_clingo
    process_honors_courses
    process_repeatable_courses
"""

import os
import re
import json
import sys
import shlex
import subprocess

from typing import Any, Dict, Iterable, List, Union, Tuple

from src.kg.knowledge_graph import KnowledgeBase, KnowledgeGraph
from src.utils.util import DependencyError, check_dependencies, timeit



[docs]
class ClingoSatistfiablityError(Exception):
    """Exception raised when Clingo returns ``UNSATISFIABLE``."""

    pass




[docs]
class ClingoSyntaxError(Exception):
    """Exception raised when there is a parsing/syntax error in the Clingo file."""

    pass




[docs]
@timeit
def process_course_data_clingo(
    json_file: Union[KnowledgeBase, KnowledgeGraph, str],
    output_file: str = None,
    repeatable_courses: List[Tuple[str, str, str]] = None,
) -> str:
    """Converts a JSON file data to a Clingo.
    This function processes course data from a JSON file and writes it to a Clingo file in the form:
    ``course(course_id, credits, career, spring1, fall1, spring2, fall2).``

    The corresponding rules for antirequisites, prerequisites, and corequisites are also generated if present, and are of the form:
        - Antirequisites:
            ``:- course(course_id, _, career, _, _, _, _), course(antireq_id, _, career, _, _, _, _).``
        - Prerequisites:
            ``:- course(course_id, _, career, _, _, _, _), not course(prereq_id, _, career, _, _, _, _).``
        - Corequisites:
            ``:- course(course_id, _, career, _, _, _, _), not course(coreq_id, _, career, _, _, _, _).``

    NOTE:
        - If a :py:class:`~src.kg.knowledge_graph.KnowledgeBase` or :py:class:`~src.kg.knowledge_graph.KnowledgeGraph` object is passed, then the Clingo output filepath is updated in the object.

    Usage example:
        >>> clingo_file = process_course_data_clingo(json_file="course_data.json")

    Args:
        json_file: Input JSON file (or :py:class:`~src.kg.knowledge_graph.KnowledgeBase` or :py:class:`~src.kg.knowledge_graph.KnowledgeGraph` object) to be converted to Clingo.
        output_file: Output filename. If not specified, then a new file of the same name is created, with an '.lp' file extension. Defaults to None.
        repeatable_courses: List of tuples of other courses that are repeatable. Each tuple should have exactly 3 elements: course_id, times_repeatable, max_credits. Defaults to None.

    Returns:
        Output Clingo knowledge base file path.
    """
    # Load JSON data from the file
    if (isinstance(json_file, KnowledgeBase)) or (
        isinstance(json_file, KnowledgeGraph)
    ):
        kg: Union[KnowledgeBase, KnowledgeGraph] = json_file
        json_file: str = kg.json
    else:
        kg: Union[KnowledgeBase, KnowledgeGraph] = None

    if output_file is None:
        output_file: str = json_file.replace(".json", ".lp")

    with open(json_file, "r") as file:
        data: Dict[Dict[str, Any]] = json.load(file)

    predicates: List[str] = []
    rules: List[str] = []

    for course_id, course_info in data.items():
        # Generate course predicates
        # offered_spring1 = int(course_info["spring1"])
        # offered_fall1 = int(course_info["fall1"])
        # offered_spring2 = int(course_info["spring2"])
        # offered_fall2 = int(course_info["fall2"])

        offered_spring: int = int(course_info.get("spring"))
        offered_fall: int = int(course_info.get("fall"))

        credits: Union[float, int] = (
            int(course_info.get("Credits"))
            if isinstance(course_info.get("Credits"), (float, int))
            else translate_range(course_info.get("Credits"))
        )

        career: str = course_info.get("Career")

        predicates.append(
            # f'course({course_id.lower()}, {credits}, "{career}", {offered_spring1}, {offered_fall1}, {offered_spring2}, {offered_fall2}).'
            f'course({course_id.lower()}, {credits}, "{career}", {offered_spring}, {offered_fall}).'
            # f'course({course_id.lower()}, {credits}, "{career}").'
        )

        # NOTE:
        #   The following code is commented out as this
        #   causes Unsatistfiability errors in Clingo.
        #
        # # Generate antirequisite rules
        # if course_info.get("Antirequisites") != "NONE":
        #     rules.append(f"% Antirequisites for {course_id.upper()}")
        #     for antireq_group in course_info.get("Antirequisites"):
        #         for antireq in antireq_group:
        #             rules.append(
        #                 # f':- course({course_id.lower()}, {credits}, "{career}", {offered_spring1}, {offered_fall1}, {offered_spring2}, {offered_fall2}), \n   course({antireq.lower()}, _, "{career}", _, _, _, _). \n'
        #                 f':- course({course_id.lower()}, {credits}, "{career}", {offered_spring}, {offered_fall}), \n   course({antireq.lower()}, _, "{career}", _, _). \n'
        #                 # f':- course({course_id.lower()}, {credits}, "{career}"), \n   course({antireq.lower()}, _, "{career}"). \n'
        #             )

        # Generate prerequisite rules
        if course_info.get("Prerequisites") != "NONE":
            rules.append(f"% Prerequisites for {course_id.upper()}")
            for prereq_group in course_info.get("Prerequisites"):
                group_conditions = ", ".join(
                    [
                        # f'\n   not course({prereq.lower()}, _, "{career}", _, _, _, _)'
                        f'\n   not course({prereq.lower()}, _, "{career}", _, _)'
                        # f'\n   not course({prereq.lower()}, _, "{career}")'
                        for prereq in prereq_group
                    ]
                )
                rules.append(
                    # f':- course({course_id.lower()}, {credits}, "{career}", {offered_spring1}, {offered_fall1}, {offered_spring2}, {offered_fall2}), {group_conditions}. \n'
                    f':- course({course_id.lower()}, {credits}, "{career}", {offered_spring}, {offered_fall}), {group_conditions}. \n'
                    # f':- course({course_id.lower()}, {credits}, "{career}"), {group_conditions}. \n'
                )

        # Generate corequisite rules
        if course_info.get("Corequisites") != "NONE":
            rules.append(f"% Corequisites for {course_id.upper()}")
            for coreq_group in course_info.get("Corequisites"):
                for coreq in coreq_group:
                    rules.append(
                        # f':- course({course_id.lower()}, {credits}, "{career}", {offered_spring1}, {offered_fall1}, {offered_spring2}, {offered_fall2}), \n   not course({coreq.lower()}, _, "{career}", {offered_spring1}, {offered_fall1}, {offered_spring2}, {offered_fall2}). \n'
                        f':- course({course_id.lower()}, {credits}, "{career}", {offered_spring}, {offered_fall}), \n   not course({coreq.lower()}, _, "{career}", {offered_spring}, {offered_fall}). \n'
                        # f':- course({course_id.lower()}, {credits}, "{career}"), \n   not course({coreq.lower()}, _, "{career}"). \n'
                    )

    # Process honors courses
    honors: List[str] = process_honors_courses(file_path=json_file)

    if honors:
        honors.insert(0, "\n% Honors Courses")

    # TODO: Uncomment this later, unable to correctly
    #   incoroporate repeatable courses in Clingo.
    #
    # # Process repeatable courses
    # repeatable: List[str] = process_repeatable_courses(
    #     json_file=json_file, other_courses=repeatable_courses
    # )

    # if repeatable:
    #     repeatable.insert(0, "\n% Repeatable Courses")

    # Write predicates and rules to file
    output = (
        ["% Course atoms"]
        + predicates
        + honors
        # + repeatable
        + ["\n% Course Rules \n"]
        + rules
    )
    _write_list_to_file(output, output_file)

    if kg:
        kg.lp = output_file

    return output_file




[docs]
def translate_range(input_string: str) -> str:
    """Converts a range of numbers in a string to a Clingo-compatible format.

    Usage example:
        >>> translate_range("0 - 9")
        '0..9'

    Args:
        input_string: Input string containing a range of numbers (e.g. 0 - 9).

    Returns:
        Clingo-compatible range format (e.g. 0..9).
    """
    # Split the string using ' - ' to separate the numbers
    parts = input_string.split(" - ")

    # Extract the integer part of each number by splitting on '.' and taking the first part
    start = parts[0].split(".")[0]
    end = parts[1].split(".")[0]

    # Concatenate with '..' for clingo
    result = start + ".." + end

    return result



def _write_list_to_file(data_list: List[str], filename: str) -> None:
    """Writes a list of data (strings) to a file.

    Args:
        data_list: Input list of data to be written to the file.
        filename: Output filename.

    Returns:
        None
    """
    with open(filename, "w") as file:
        for item in data_list:
            file.write(item + "\n")
    return None



[docs]
def append_rules(file_list: List[str], output_file: str) -> str:
    """Appends the contents of multiple files into a single output file.
    Intended for use with Clingo ``.lp`` files.

    Args:
        file_list: Input list of files to be appended.
        output_file: New file to be created with the appended contents.

    Returns:
        Path to the output file.
    """
    try:
        # Open the output file in write mode (this will overwrite existing file)
        with open(output_file, "w") as outfile:
            # Iterate over the list of files
            for file_name in file_list:
                # Open each file in read mode
                with open(file_name, "r") as infile:
                    # Read the content of the file
                    content = infile.read()
                    # Write the content to the output file
                    outfile.write(content)
                    # Optionally add a newline between contents of different files
                    outfile.write("\n")
        # print(f"Contents appended successfully into {output_file}.")
        return output_file
    except Exception as e:
        print(f"An error occurred: {e}")




[docs]
@timeit
def query_clingo(
    knowledge: Union[KnowledgeBase, KnowledgeGraph, str],
    verbose: bool = False,
    num_models: int = None,
    configuration: str = "handy",
    parallel_mode: int = None,
    query: Union[str, Tuple[str]] = None,
) -> str:
    """Queries a Clingo knowledge base/graph file using a given query.

    WARNING:
        - This function assumes that ``clingo`` is installed and is accessible via the system path variable.

    NOTE:
        - The query must be included in the Clingo file.

    Args:
        knowledge: Input Clingo knowledge base/graph file (or :py:class:`~src.kg.knowledge_graph.KnowledgeBase` or :py:class:`~src.kg.knowledge_graph.KnowledgeGraph` object) to be queried.
        verbose: Prints verbose output if set to ``True``. Defaults to ``False``.
        num_models: Number of models to generate. Defaults to None.
        configuration: Clingo configuration. Defaults to ``"handy"``.
        parallel_mode: Parallel mode, maximum number of threads. Defaults to None.
        query: Filepaths to query files (.lp files), to be passed to Clingo. Defaults to None.

    Returns:
        Query results.

    Raises:
        ValueError: If the query is not a string or a tuple of strings.
        ClingoSatistfiablityError: If the query returns ``UNSATISFIABLE``.
        ClingoSyntaxError: If there is a parsing/syntax error in the Clingo file.
        DependencyError: If Clingo is not installed or added to the system path variable.
    """
    # Check if Clingo is installed
    try:
        check_dependencies(dependencies=("clingo",))
    except DependencyError as e:
        print(f"Error: {e}")
        sys.exit(1)

    # Check if the input is a KnowledgeBase or KnowledgeGraph object
    if (isinstance(knowledge, KnowledgeBase)) or (
        isinstance(knowledge, KnowledgeGraph)
    ):
        kg: Union[KnowledgeBase, KnowledgeGraph] = knowledge
        knowledge: str = kg.lp
    else:
        kg: Union[KnowledgeBase, KnowledgeGraph] = None

    # Check if the query is a tuple
    if isinstance(query, str):
        query: Tuple[str] = (query,)
    elif query is None:
        query: Tuple = ()
    elif isinstance(query, tuple):
        pass
    else:
        raise ValueError("Query must be a string or a tuple of strings.")

    # Get the query absolute filepaths
    query: Tuple[str] = (os.path.abspath(q) for q in query)

    # Unpack the tuple
    query: str = " ".join(query)

    # Set configuration options
    if verbose:
        cmd_opt: str = "-V"
    else:
        cmd_opt: str = ""

    if num_models is not None:
        cmd_opt += f"-n {num_models}"

    if parallel_mode is not None:
        cmd_opt += f"--parallel-mode {parallel_mode}"

    if configuration is not None:
        cmd_opt += f"--configuration={configuration}"

    # Start Clingo session
    proc = subprocess.Popen(
        shlex.split(s=f"clingo {cmd_opt} {knowledge} {query}"), stdout=subprocess.PIPE
    )

    output = proc.stdout.read().decode("utf-8")

    # Check if the output contains 'UNSATISFIABLE'
    if "UNSATISFIABLE" in output:
        raise ClingoSatistfiablityError("The query returned UNSATISFIABLE.")
    elif "ERROR" in output:
        raise ClingoSyntaxError("There was a parsing/syntax error in the Clingo file.")

    print(output)

    return output




[docs]
def process_honors_courses(file_path: str) -> List[str]:
    """Processes honors courses from a JSON file and writes them to a list in the form: ``honors(course_id)``.

    NOTE:
        - Mainly intended for use with SBU CSE courses.

    Usage example:
        >>> process_honors_courses(file_path="cse_courses.json")

    Args:
        file_path: Path to the JSON file containing course data.

    Returns:
        List of honors courses.
    """
    # Load the JSON data from the file
    with open(file_path, "r") as file:
        data = json.load(file)

    course_list: List[str] = []

    # Iterate over each course
    for course_id, course_details in data.items():
        # Check if 'honors' is in the description
        if ("honors" in course_details.get("CourseTitle", "").lower()) or (
            ("honors" in course_details.get("Description", "").lower())
        ):
            course_list.append(f"honors({course_id.lower()}).")
    return course_list



# TODO:
#
# The repeatable atom should have all info, no '_'.
# e.g. repeatable(cse593, 6, 12).

[docs]
def process_repeatable_courses(
    json_file: str, other_courses: Iterable[Tuple[str, str, str]] = None
) -> List[str]:
    """Processes repeatable courses from a JSON file and writes them to a list in the form: ``repeatable(course_id, times_repeatable, max_credits)``.

    NOTE:
        - The input JSON file should contain course data.
        - The repeatable courses are identified based on the course description.
        - The function also accepts a list of other courses that are repeatable.

    Usage example:
        >>> process_repeatable_courses(json_file="course_data.json", other_courses=[("cse593", "_", "_")]
        ['repeatable(cse390, 2, _).',
         'repeatable(cse391, 2, _).',
         'repeatable(cse392, 2, _).',
         'repeatable(cse393, 2, _).',
         'repeatable(cse394, 2, _).',
         'repeatable(cse475, 2, _).',
         'repeatable(cse488, _, 12).',
         .
         .
         .
         'repeatable(cse593, _, _).',
         .
         .
         .
         'repeatable(cse693, 2, _).']

    Args:
        json_file: Input JSON file containing course data.
        other_courses: Iterable of tuples of other courses that are repeatable. Each tuple should have exactly 3 elements: course_id, times_repeatable, max_credits. Defaults to None.

    Raises:
        ValueError: If each course tuple does not have exactly 3 elements, or if any of the 2nd or 3rd elements of the tuple do not contain an integer or the string "_".

    Returns:
        List of repeatable courses.
    """
    with open(json_file, "r") as file:
        data: Dict[Dict[str, Any]] = json.load(file)

    atoms: List[str] = []

    # Add other repeatable courses
    if other_courses is not None:
        for course_info in other_courses:
            if not course_info[0].upper() in data.keys():
                print(f"Course {course_info[0]} not found in the JSON file.")
                continue
            if (
                (len(course_info) != 3)
                or (not _check_repeatable_input(course_info[1]))
                or (not _check_repeatable_input(course_info[2]))
            ):
                raise ValueError("Each course tuple should have exactly 3 elements.")
            atom = f"repeatable({course_info[0]}, {course_info[1]}, {course_info[2]})."
            atoms.append(atom)

    for course_id, course_info in data.items():
        description = course_info["Description"].lower()
        if ("repeat" in description) and (not course_id in atoms):
            times = "_"
            max_credits = "_"
            if "more than twice" in description:
                times = 2
            elif "repeated once" in description:
                times = 2
            elif "repeated twice" in description:
                times = 3
            elif ("credits" in description.lower()) and (
                _extract_credits(description) is not None
            ):
                max_credits = _extract_credits(description)
            else:
                # NOTE: Uncommenting the line below will set the default value of times to 2.
                #   This behavior may not be desired in all cases.
                # times: int = 2
                continue
            atom: str = f"repeatable({course_id.lower()}, {times}, {max_credits})."
            atoms.append(atom)

    # Sort the atoms
    atoms: List[str] = sorted(atoms)
    return atoms



def _extract_credits(description: str) -> int:
    """Helper function that extracts the number of credits from a course description.

    Args:
        description: Course description string.

    Returns:
        Number of credits.
    """
    # Search for one or more digits followed by the word 'credits'
    match = re.search(r"(\d+) credits", description)
    if match:
        # Return the number found
        return int(match.group(1))
    else:
        return None


def _check_repeatable_input(course_info: str) -> bool:
    """Helper function that checks if the input for repeatable courses is valid.
    Valid input is either an integer or the string "_".

    Args:
        course_info: Input course information.

    Returns:
        True if the input is valid, False otherwise.
    """
    try:
        int(course_info)
        return True
    except ValueError:
        if course_info == "_":
            return True
        else:
            return False