Fix part of pylint issues

gustavocidornelas · whoseoyster · commit d2c762ccaf98 · 2023-08-23T08:54:22.000-07:00
diff --git a/openlayer/constants.py b/openlayer/constants.py
@@ -1,3 +1,5 @@
+"""Module for storing constants used throughout the OpenLayer Python Client.
+"""
 import os
 
 # ---------------------------- Commit/staging flow --------------------------- #
diff --git a/openlayer/model_runners/ll_model_runners.py b/openlayer/model_runners/ll_model_runners.py
@@ -4,7 +4,6 @@
 """
 
 import datetime
-import json
 import logging
 import warnings
 from abc import ABC, abstractmethod
@@ -59,32 +58,33 @@ def run(
         """Runs the input data through the model."""
         if self.in_memory:
             return self._run_in_memory(
-                input_data_df=input_data,
+                input_data=input_data,
                 output_column_name=output_column_name,
             )
         else:
             return self._run_in_conda(
-                input_data_df=input_data, output_column_name=output_column_name
+                input_data=input_data, output_column_name=output_column_name
             )
 
     def _run_in_memory(
         self,
-        input_data_df: pd.DataFrame,
+        input_data: pd.DataFrame,
         output_column_name: Optional[str] = None,
     ) -> pd.DataFrame:
         """Runs the input data through the model in memory and returns a pandas
         dataframe."""
         for output_df, _ in tqdm(
-            self._run_in_memory_and_yield_progress(input_data_df, output_column_name),
-            total=len(input_data_df),
+            self._run_in_memory_and_yield_progress(input_data, output_column_name),
+            total=len(input_data),
             colour="BLUE",
         ):
             pass
+        # pylint: disable=undefined-loop-variable
         return output_df
 
     def _run_in_memory_and_yield_progress(
         self,
-        input_data_df: pd.DataFrame,
+        input_data: pd.DataFrame,
         output_column_name: Optional[str] = None,
     ) -> Generator[Tuple[pd.DataFrame, float], None, None]:
         """Runs the input data through the model in memory and yields the results
@@ -95,10 +95,10 @@ def _run_in_memory_and_yield_progress(
         timestamps = []
         run_exceptions = set()
         run_cost = 0
-        total_rows = len(input_data_df)
+        total_rows = len(input_data)
         current_row = 0
 
-        for _, input_data_row in input_data_df.iterrows():
+        for _, input_data_row in input_data.iterrows():
             # Check if output column already has a value to avoid re-running
             if output_column_name and output_column_name in input_data_row:
                 output_value = input_data_row[output_column_name]
@@ -149,6 +149,7 @@ def _run_single_input(self, input_data_row: pd.Series) -> Tuple[str, float, set]
         try:
             outputs = self._get_llm_output(llm_input)
             return outputs["output"], outputs["cost"], set()
+        # pylint: disable=broad-except
         except Exception as exc:
             return None, 0, {exc}
 
@@ -223,7 +224,7 @@ def _report_exceptions(self, exceptions: set) -> None:
             )
 
     def _run_in_conda(
-        self, input_data_df: pd.DataFrame, output_column_name: Optional[str] = None
+        self, input_data: pd.DataFrame, output_column_name: Optional[str] = None
     ) -> pd.DataFrame:
         """Runs LLM prediction job in a conda environment."""
         raise NotImplementedError(
@@ -253,7 +254,7 @@ def run_and_yield_progress(
         """Runs the input data through the model and yields progress."""
         if self.in_memory:
             yield from self._run_in_memory_and_yield_progress(
-                input_data_df=input_data,
+                input_data=input_data,
                 output_column_name=output_column_name,
             )
         else:
@@ -376,7 +377,7 @@ def _initialize_llm(self):
             raise ValueError(
                 "Cohere API key is invalid. Please pass a valid API key as the "
                 f"keyword argument 'cohere_api_key' \n Error message: {e}"
-            )
+            ) from e
         if self.model_config.get("model") is None:
             warnings.warn("No model specified. Defaulting to model 'command'.")
         if self.model_config.get("model_parameters") is None:
@@ -461,7 +462,7 @@ def _initialize_llm(self):
             raise ValueError(
                 "OpenAI API key is invalid. Please pass a valid API key as the "
                 f"keyword argument 'openai_api_key' \n Error message: {e}"
-            )
+            ) from e
         if self.model_config.get("model") is None:
             warnings.warn("No model specified. Defaulting to model 'gpt-3.5-turbo'.")
         if self.model_config.get("model_parameters") is None:
@@ -539,12 +540,13 @@ def _initialize_llm(self):
         """Initializes the self-hosted LL model."""
         # Check if API key is valid
         try:
-            requests.get(self.url)
+            # TODO: move request timeout to constants.py
+            requests.get(self.url, timeout=10800)
         except Exception as e:
             raise ValueError(
                 "URL is invalid. Please pass a valid URL as the "
                 f"keyword argument 'url' \n Error message: {e}"
-            )
+            ) from e
 
     def _get_llm_input(self, injected_prompt: List[Dict[str, str]]) -> str:
         """Prepares the input for the self-hosted LLM."""
@@ -572,7 +574,8 @@ def _make_request(self, llm_input: str) -> Dict[str, Any]:
             "Content-Type": "application/json",
         }
         data = {self.input_key: llm_input}
-        response = requests.post(self.url, headers=headers, json=data)
+        # TODO: move request timeout to constants.py
+        response = requests.post(self.url, headers=headers, json=data, timeout=10800)
         if response.status_code == 200:
             response_data = response.json()[0]
             return response_data
@@ -592,4 +595,6 @@ class HuggingFaceModelRunner(SelfHostedLLModelRunner):
     """Wraps LLMs hosted in HuggingFace."""
 
     def __init__(self, url, api_key):
-        super().__init__(url, api_key, input_key="inputs", output_key="generated_text")
+        super().__init__(
+            url=url, ali_key=api_key, input_key="inputs", output_key="generated_text"
+        )
diff --git a/openlayer/model_runners/tests/test_llm_runners.py b/openlayer/model_runners/tests/test_llm_runners.py
@@ -54,26 +54,30 @@
 )
 
 # ----------------------------- Expected results ----------------------------- #
+# flake8: noqa: E501
 OPENAI_PROMPT = [
     *PROMPT[:-1],
     {
         "role": "user",
         "content": """description: A smartwatch with fitness tracking capabilities \n\nseed words: smart, fitness, health""",
     },
 ]
-COHERE_PROMPT = """S: You are a helpful assistant. 
+
+# flake8: noqa: E501
+COHERE_PROMPT = """S: You are a helpful assistant.
 U: You will be provided with a product description and seed words, and your task is to generate a list
 of product names and provide a short description of the target customer for such product. The output
-must be a valid JSON with attributes `names` and `target_custommer`. 
-A: Let\'s get started! 
-U: Product description: \n description: A home milkshake maker \n seed words: fast, healthy, compact 
-A: {\n    "names": ["QuickBlend", "FitShake", "MiniMix"]\n    "target_custommer": "College students that are into fitness and healthy living"\n} 
-U: description: A smartwatch with fitness tracking capabilities \n\nseed words: smart, fitness, health 
+must be a valid JSON with attributes `names` and `target_custommer`.
+A: Let\'s get started!
+U: Product description: \n description: A home milkshake maker \n seed words: fast, healthy, compact
+A: {\n    "names": ["QuickBlend", "FitShake", "MiniMix"]\n    "target_custommer": "College students that are into fitness and healthy living"\n}
+U: description: A smartwatch with fitness tracking capabilities \n\nseed words: smart, fitness, health
 A:"""
 
+# flake8: noqa: E501
 ANTHROPIC_PROMPT = f"""{anthropic.HUMAN_PROMPT} You are a helpful assistant. {anthropic.HUMAN_PROMPT} You will be provided with a product description and seed words, and your task is to generate a list
 of product names and provide a short description of the target customer for such product. The output
-must be a valid JSON with attributes `names` and `target_custommer`. {anthropic.AI_PROMPT} Let\'s get started! {anthropic.HUMAN_PROMPT} Product description: 
+must be a valid JSON with attributes `names` and `target_custommer`. {anthropic.AI_PROMPT} Let\'s get started! {anthropic.HUMAN_PROMPT} Product description:
  description: A home milkshake maker \n seed words: fast, healthy, compact {anthropic.AI_PROMPT} {{\n    "names": ["QuickBlend", "FitShake", "MiniMix"]\n    "target_custommer": "College students that are into fitness and healthy living"\n}} {anthropic.HUMAN_PROMPT} description: A smartwatch with fitness tracking capabilities \n\nseed words: smart, fitness, health {anthropic.AI_PROMPT}"""
 
 
diff --git a/openlayer/validators/commit_validators.py b/openlayer/validators/commit_validators.py
@@ -129,7 +129,9 @@ def _validate_bundle_state(self):
             )
 
         # Check if flagged to compute the model outputs
-        with open(f"{self.bundle_path}/commit.yaml", "r") as commit_file:
+        with open(
+            f"{self.bundle_path}/commit.yaml", "r", encoding="UTF-8"
+        ) as commit_file:
             commit = yaml.safe_load(commit_file)
         compute_outputs = commit.get("computeOutputs", False)
 
@@ -262,7 +264,7 @@ def _validate_bundle_resources(self):
         if "model" in self._bundle_resources and not self._skip_model_validation:
             model_config_file_path = f"{self.bundle_path}/model/model_config.yaml"
             model_type = self.model_config.get("modelType")
-            if model_type == "shell" or model_type == "api":
+            if model_type in ("shell", "api"):
                 model_validator = model_validators.get_validator(
                     task_type=self.task_type,
                     model_config_file_path=model_config_file_path,
diff --git a/openlayer/validators/model_validators.py b/openlayer/validators/model_validators.py
@@ -619,15 +619,12 @@ def get_validator(
 
 
 # --------------- Helper functions used by multiple validators --------------- #
-def dir_exceeds_size_limit(dir: str) -> bool:
+def dir_exceeds_size_limit(dir_path: str) -> bool:
     """Checks whether the tar version of the directory exceeds the maximim limit."""
     with tempfile.TemporaryDirectory() as tmp_dir:
         tar_file_path = os.path.join(tmp_dir, "tarfile")
         with tarfile.open(tar_file_path, mode="w:gz") as tar:
-            tar.add(dir, arcname=os.path.basename(dir))
+            tar.add(dir_path, arcname=os.path.basename(dir_path))
         tar_file_size = os.path.getsize(tar_file_path)
 
-        if tar_file_size > constants.MAXIMUM_TAR_FILE_SIZE * 1024 * 1024:
-            return True
-        else:
-            return False
+        return tar_file_size > constants.MAXIMUM_TAR_FILE_SIZE * 1024 * 1024

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+"""Module for storing constants used throughout the OpenLayer Python Client.`
	`2`	`+"""`
`1`	`3`	`import os`
`2`	`4`
`3`	`5`	`# ---------------------------- Commit/staging flow --------------------------- #`