openlayer-ai
diff --git a/‎src/openlayer/lib/.keep
Lines changed: 0 additions & 4 deletions b/‎src/openlayer/lib/.keep
Lines changed: 0 additions & 4 deletions
diff --git a/‎src/openlayer/lib/__init__.py
Lines changed: 37 additions & 0 deletions b/‎src/openlayer/lib/__init__.py
Lines changed: 37 additions & 0 deletions
diff --git a/‎src/openlayer/lib/constants.py
Lines changed: 93 additions & 0 deletions b/‎src/openlayer/lib/constants.py
Lines changed: 93 additions & 0 deletions
diff --git a/‎src/openlayer/lib/core/__init__.py
Lines changed: 1 addition & 0 deletions b/‎src/openlayer/lib/core/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/openlayer/lib/core/base_model.py
Lines changed: 166 additions & 0 deletions b/‎src/openlayer/lib/core/base_model.py
Lines changed: 166 additions & 0 deletions
diff --git a/‎src/openlayer/lib/integrations/__init__.py b/‎src/openlayer/lib/integrations/__init__.py
@@ -0,0 +1,37 @@
+"""Openlayer lib.
+"""
+
+__all__ = [
+    "trace",
+    "trace_openai",
+    "trace_openai_assistant_thread_run",
+    "Openlayer",
+    "ConfigLlmData",
+]
+
+# ---------------------------------- Tracing --------------------------------- #
+from .tracing import tracer
+from .._client import Openlayer
+from ..types.inference_pipelines.data_stream_params import ConfigLlmData
+
+trace = tracer.trace
+
+
+def trace_openai(client):
+    """Trace OpenAI chat completions."""
+    # pylint: disable=import-outside-toplevel
+    import openai
+
+    from .integrations import openai_tracer
+
+    if not isinstance(client, (openai.Client, openai.AzureOpenAI)):
+        raise ValueError("Invalid client. Please provide an OpenAI client.")
+    return openai_tracer.trace_openai(client)
+
+
+def trace_openai_assistant_thread_run(client, run):
+    """Trace OpenAI Assistant thread run."""
+    # pylint: disable=import-outside-toplevel
+    from .integrations import openai_tracer
+
+    return openai_tracer.trace_openai_assistant_thread_run(client, run)
@@ -0,0 +1,93 @@
+"""Module for storing constants used throughout the OpenLayer SDK.
+"""
+
+# --------------------------- LLM usage costs table -------------------------- #
+# Last update: 2024-02-05
+OPENAI_COST_PER_TOKEN = {
+    "babbage-002": {
+        "input": 0.0004e-3,
+        "output": 0.0004e-3,
+    },
+    "davinci-002": {
+        "input": 0.002e-3,
+        "output": 0.002e-3,
+    },
+    "gpt-3.5-turbo": {
+        "input": 0.0005e-3,
+        "output": 0.0015e-3,
+    },
+    "gpt-3.5-turbo-0125": {
+        "input": 0.0005e-3,
+        "output": 0.0015e-3,
+    },
+    "gpt-3.5-turbo-0301": {
+        "input": 0.0015e-3,
+        "output": 0.002e-3,
+    },
+    "gpt-3.5-turbo-0613": {
+        "input": 0.0015e-3,
+        "output": 0.002e-3,
+    },
+    "gpt-3.5-turbo-1106": {
+        "input": 0.001e-3,
+        "output": 0.002e-3,
+    },
+    "gpt-3.5-turbo-16k-0613": {
+        "input": 0.003e-3,
+        "output": 0.004e-3,
+    },
+    "gpt-3.5-turbo-instruct": {
+        "input": 0.0015e-3,
+        "output": 0.002e-3,
+    },
+    "gpt-4": {
+        "input": 0.03e-3,
+        "output": 0.06e-3,
+    },
+    "gpt-4-turbo-preview": {
+        "input": 0.01e-3,
+        "output": 0.03e-3,
+    },
+    "gpt-4-0125-preview": {
+        "input": 0.01e-3,
+        "output": 0.03e-3,
+    },
+    "gpt-4-1106-preview": {
+        "input": 0.01e-3,
+        "output": 0.03e-3,
+    },
+    "gpt-4-0314": {
+        "input": 0.03e-3,
+        "output": 0.06e-3,
+    },
+    "gpt-4-1106-vision-preview": {
+        "input": 0.01e-3,
+        "output": 0.03e-3,
+    },
+    "gpt-4-32k": {
+        "input": 0.06e-3,
+        "output": 0.12e-3,
+    },
+    "gpt-4-32k-0314": {
+        "input": 0.06e-3,
+        "output": 0.12e-3,
+    },
+}
+# Last update: 2024-03-26
+AZURE_OPENAI_COST_PER_TOKEN = {
+    "babbage-002": {
+        "input": 0.0004e-3,
+        "output": 0.0004e-3,
+    },
+    "davinci-002": {
+        "input": 0.002e-3,
+        "output": 0.002e-3,
+    },
+    "gpt-35-turbo": {"input": 0.0005e-3, "output": 0.0015e-3},
+    "gpt-35-turbo-0125": {"input": 0.0005e-3, "output": 0.0015e-3},
+    "gpt-35-turbo-instruct": {"input": 0.0015e-3, "output": 0.002e-3},
+    "gpt-4-turbo": {"input": 0.01e-3, "output": 0.03e-3},
+    "gpt-4-turbo-vision": {"input": 0.01e-3, "output": 0.03e-3},
+    "gpt-4-8k": {"input": 0.03e-3, "output": 0.06e-3},
+    "gpt-4-32k": {"input": 0.06e-3, "output": 0.12e-3},
+}
@@ -0,0 +1 @@
+
@@ -0,0 +1,166 @@
+"""Base class for an Openlayer model."""
+
+import os
+import abc
+import json
+import time
+import inspect
+import argparse
+from typing import Any, Dict, Tuple
+from dataclasses import field, dataclass
+
+import pandas as pd
+
+from ..tracing import tracer
+
+
+@dataclass
+class RunReturn:
+    """The return type of the `run` method in the Openlayer model."""
+
+    output: Any
+    """The output of the model."""
+
+    other_fields: Dict[str, Any] = field(default_factory=dict)
+    """Any other fields that you want to log."""
+
+
+class OpenlayerModel(abc.ABC):
+    """Interface for the Openlayer model.
+
+    Your model's class should inherit from this class and implement either:
+    -  the `run` method (which takes a single row of data as input and returns
+    a `RunReturn` object)
+    - `run_batch_from_df` method (which takes a pandas DataFrame as input and returns
+    a tuple of a DataFrame and a config dict).
+
+    It is more conventional to implement the `run` method.
+
+    Refer to Openlayer's templates for examples of how to implement this class.
+    """
+
+    def run_from_cli(self) -> None:
+        """Run the model from the command line."""
+        parser = argparse.ArgumentParser(description="Run data through a model.")
+        parser.add_argument(
+            "--dataset-path", type=str, required=True, help="Path to the dataset"
+        )
+        parser.add_argument(
+            "--output-dir",
+            type=str,
+            required=False,
+            help="Directory to dump the results in",
+        )
+
+        # Parse the arguments
+        args = parser.parse_args()
+
+        return self.batch(
+            dataset_path=args.dataset_path,
+            output_dir=args.output_dir,
+        )
+
+    def batch(self, dataset_path: str, output_dir: str) -> None:
+        """Reads the dataset from a file and runs the model on it."""
+        # Load the dataset into a pandas DataFrame
+        if dataset_path.endswith(".csv"):
+            df = pd.read_csv(dataset_path)
+        elif dataset_path.endswith(".json"):
+            df = pd.read_json(dataset_path, orient="records")
+
+        # Call the model's run_batch method, passing in the DataFrame
+        output_df, config = self.run_batch_from_df(df)
+        self.write_output_to_directory(output_df, config, output_dir)
+
+    def run_batch_from_df(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]:
+        """Function that runs the model and returns the result."""
+        # Ensure the 'output' column exists
+        if "output" not in df.columns:
+            df["output"] = None
+
+        # Get the signature of the 'run' method
+        run_signature = inspect.signature(self.run)
+
+        for index, row in df.iterrows():
+            # Filter row_dict to only include keys that are valid parameters
+            # for the 'run' method
+            row_dict = row.to_dict()
+            filtered_kwargs = {
+                k: v for k, v in row_dict.items() if k in run_signature.parameters
+            }
+
+            # Call the run method with filtered kwargs
+            output = self.run(**filtered_kwargs)
+
+            df.at[index, "output"] = output.output
+
+            for k, v in output.other_fields.items():
+                if k not in df.columns:
+                    df[k] = None
+                df.at[index, k] = v
+
+            trace = tracer.get_current_trace()
+            if trace:
+                processed_trace, _ = tracer.post_process_trace(trace_obj=trace)
+                df.at[index, "steps"] = trace.to_dict()
+                if "latency" in processed_trace:
+                    df.at[index, "latency"] = processed_trace["latency"]
+                if "cost" in processed_trace:
+                    df.at[index, "cost"] = processed_trace["cost"]
+                if "tokens" in processed_trace:
+                    df.at[index, "tokens"] = processed_trace["tokens"]
+
+        config = {
+            "outputColumnName": "output",
+            "inputVariableNames": list(run_signature.parameters.keys()),
+            "metadata": {
+                "output_timestamp": time.time(),
+            },
+        }
+
+        if "latency" in df.columns:
+            config["latencyColumnName"] = "latency"
+        if "cost" in df.columns:
+            config["costColumnName"] = "cost"
+        if "tokens" in df.columns:
+            config["numOfTokenColumnName"] = "tokens"
+
+        return df, config
+
+    def write_output_to_directory(
+        self,
+        output_df: pd.DataFrame,
+        config: Dict[str, Any],
+        output_dir: str,
+        fmt: str = "json",
+    ):
+        """Writes the output DataFrame to a file in the specified directory based on the
+        given format.
+        """
+        os.makedirs(
+            output_dir, exist_ok=True
+        )  # Create the directory if it doesn't exist
+
+        # Determine the filename based on the dataset name and format
+        filename = f"dataset.{fmt}"
+        output_path = os.path.join(output_dir, filename)
+
+        # Write the config to a json file
+        config_path = os.path.join(output_dir, "config.json")
+        with open(config_path, "w", encoding="utf-8") as f:
+            json.dump(config, f, indent=4)
+
+        # Write the DataFrame to the file based on the specified format
+        if fmt == "csv":
+            output_df.to_csv(output_path, index=False)
+        elif fmt == "json":
+            output_df.to_json(output_path, orient="records", indent=4)
+        else:
+            raise ValueError("Unsupported format. Please choose 'csv' or 'json'.")
+
+        print(f"Output written to {output_path}")
+
+    @abc.abstractmethod
+    def run(self, **kwargs) -> RunReturn:
+        """Function that runs the model and returns the result."""
+        pass