-
Notifications
You must be signed in to change notification settings - Fork 13
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
made nessus subfolder and added script for fixing container license #320
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -157,7 +157,7 @@ def add_commit_push_security_md(repo_path, branch_name): | |
|
||
# Create a pull request | ||
def create_pull_request(repo_path, branch_name, default_branch): | ||
"""Create a pull request for the branch, attempt to add reviewers, and assign 'wz-gsa'.""" | ||
""f"Create a pull request for the branch, attempt to add reviewers, and assign '{ASSIGNEE}'.""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With the This should probably be either: or: |
||
original_dir = os.getcwd() # Save the current directory | ||
try: | ||
os.chdir(repo_path) # Change to the repo's directory | ||
|
@@ -195,7 +195,7 @@ def create_pull_request(repo_path, branch_name, default_branch): | |
|
||
if ( | ||
"Reviewers could not be requested" in result | ||
or "Assignee could not be added" in result | ||
or f"{ASSIGNEE} could not be added" in result | ||
): | ||
logging.warning( | ||
"Attempting to add 'cloud-gov-pages-operations' as a fallback reviewer." | ||
|
@@ -208,10 +208,10 @@ def create_pull_request(repo_path, branch_name, default_branch): | |
else: | ||
logging.info("Reviewer successfully added.") | ||
|
||
if "Assignee could not be added" in result: | ||
logging.error("Failed to add 'wz-gsa' as the assignee.") | ||
if f"{ASSIGNEE} could not be added" in result: | ||
logging.error(f"Failed to add '{ASSIGNEE}' as the assignee.") | ||
else: | ||
logging.info("'wz-gsa' successfully assigned to the PR.") | ||
logging.info(f"'{ASSIGNEE}' successfully assigned to the PR.") | ||
|
||
except Exception as e: | ||
logging.error( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
import os | ||
import sys | ||
import requests | ||
import zipfile | ||
import io | ||
import argparse | ||
import logging | ||
from typing import List | ||
|
||
# Setting up basic configuration for logging | ||
logging.basicConfig( | ||
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" | ||
) | ||
|
||
|
||
def get_excluded_files() -> List[str]: | ||
""" | ||
Returns a list of filenames to be excluded from processing. | ||
These are typically non-code files that do not contain useful information | ||
for analysis or model training. | ||
""" | ||
return ["README.md", "README", "LICENSE", "LICENSE.txt"] | ||
|
||
|
||
def is_excluded_file(file_path: str, excluded_files: List[str]) -> bool: | ||
""" | ||
Determines whether a file should be excluded based on its filename ending. | ||
Args: | ||
file_path: The path of the file within the repository. | ||
excluded_files: A list of filename endings to exclude. | ||
Returns: | ||
True if the file is to be excluded, False otherwise. | ||
""" | ||
return any(file_path.endswith(ex_file) for ex_file in excluded_files) | ||
|
||
|
||
def has_sufficient_content(file_content: str, min_line_count: int = 10) -> bool: | ||
""" | ||
Checks if the file content has at least a minimum number of non-empty lines. | ||
Args: | ||
file_content: The content of the file as a string. | ||
min_line_count: The minimum number of non-empty lines required for the file to be included. | ||
Returns: | ||
True if the content meets the minimum line count, False otherwise. | ||
""" | ||
lines = [line for line in file_content.split("\n") if line.strip()] | ||
return len(lines) >= min_line_count | ||
|
||
|
||
def download_and_process_files( | ||
repo_url: str, output_file: str, branch_or_tag: str = "master" | ||
): | ||
""" | ||
Downloads and processes files from a GitHub repository archive. | ||
Args: | ||
repo_url: The URL of the GitHub repository. | ||
output_file: The path to the output text file where combined contents will be stored. | ||
branch_or_tag: The branch or tag to download from the repository. | ||
""" | ||
excluded_files = get_excluded_files() | ||
download_url = f"{repo_url}/archive/refs/heads/{branch_or_tag}.zip" | ||
|
||
try: | ||
response = requests.get(download_url) | ||
response.raise_for_status() # Raises HTTPError for bad requests (4XX or 5XX) | ||
|
||
with zipfile.ZipFile(io.BytesIO(response.content)) as zip_file: | ||
with open(output_file, "w", encoding="utf-8") as outfile: | ||
for file_path in zip_file.namelist(): | ||
if file_path.endswith("/") or is_excluded_file( | ||
file_path, excluded_files | ||
): | ||
continue | ||
with zip_file.open(file_path) as file: | ||
file_content = file.read().decode("utf-8") | ||
if has_sufficient_content(file_content): | ||
outfile.write(f"# File: {file_path}\n{file_content}\n\n") | ||
|
||
logging.info(f"Combined source code saved to {output_file}") | ||
except requests.exceptions.HTTPError as e: | ||
logging.error(f"HTTP Error occurred: {e}") | ||
except requests.exceptions.RequestException as e: | ||
logging.error(f"Error downloading the file: {e}") | ||
except zipfile.BadZipFile: | ||
logging.error( | ||
"Error processing zip file: The downloaded file was not a valid zip file." | ||
) | ||
except Exception as e: | ||
logging.error(f"An unexpected error occurred: {e}") | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser( | ||
description="Download and process files from a GitHub repository." | ||
) | ||
parser.add_argument("repo_url", type=str, help="The URL of the GitHub repository") | ||
parser.add_argument( | ||
"--branch_or_tag", | ||
type=str, | ||
help="The branch or tag of the repository to download", | ||
default="master", | ||
) | ||
args = parser.parse_args() | ||
|
||
output_file = f"{args.repo_url.split('/')[-1]}_combined.txt" | ||
download_and_process_files(args.repo_url, output_file, args.branch_or_tag) |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's this for? doesn't seem to be mentioned in the PR description or READMEs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's addition was unintentional on my part. updating my commit and reverting this to draft |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,236 @@ | ||
import os | ||
import sys | ||
import requests | ||
import zipfile | ||
import io | ||
import ast | ||
import argparse | ||
import logging | ||
from typing import List, Dict | ||
|
||
# Configure logging | ||
logging.basicConfig(level=logging.INFO) | ||
|
||
|
||
def get_language_config() -> Dict[str, Dict]: | ||
""" | ||
Returns a dictionary of language configurations including file extensions, | ||
excluded directories, files, and indicators for test files. | ||
""" | ||
return { | ||
"python": { | ||
"extensions": [".py", ".pyw"], | ||
"excluded_dirs": [ | ||
"docs", | ||
"examples", | ||
"tests", | ||
"test", | ||
"scripts", | ||
"utils", | ||
"benchmarks", | ||
"__pycache__", | ||
], | ||
"excluded_files": [ | ||
"hubconf.py", | ||
"setup.py", | ||
".github", | ||
".gitignore", | ||
"LICENSE", | ||
"README", | ||
"stale.py", | ||
"gen-card-", | ||
"write_model_card", | ||
], | ||
"test_indicators": [ | ||
"import unittest", | ||
"import pytest", | ||
"from unittest", | ||
"from pytest", | ||
], | ||
}, | ||
"go": { | ||
"extensions": [".go"], | ||
"excluded_dirs": [ | ||
"docs", | ||
"examples", | ||
"tests", | ||
"test", | ||
"scripts", | ||
"utils", | ||
"benchmarks", | ||
"vendor", | ||
], | ||
"excluded_files": [ | ||
"go.mod", | ||
"go.sum", | ||
"Makefile", | ||
".github", | ||
".gitignore", | ||
"LICENSE", | ||
"README", | ||
], | ||
"test_indicators": ["import testing", "func Test"], | ||
}, | ||
"terraform": { | ||
"extensions": [".tf", ".tfvars", ".hcl"], | ||
"excluded_dirs": ["examples", "tests", "docs"], | ||
"excluded_files": [".gitignore", "LICENSE", "README.md"], | ||
"test_indicators": [], | ||
}, | ||
"docker": { | ||
"extensions": ["Dockerfile", ".dockerignore"], | ||
"excluded_dirs": [], | ||
"excluded_files": [".gitignore", "LICENSE", "README.md"], | ||
"test_indicators": [], | ||
}, | ||
"bosh": { | ||
"extensions": [".yml"], | ||
"excluded_dirs": ["docs", "examples", "tests", "test"], | ||
"excluded_files": ["LICENSE", "README.md"], | ||
"test_indicators": [], | ||
}, | ||
"cloudfoundry": { | ||
"extensions": [".yml"], | ||
"excluded_dirs": ["docs", "examples", "tests", "test"], | ||
"excluded_files": ["LICENSE", "README.md"], | ||
"test_indicators": [], | ||
}, | ||
} | ||
|
||
|
||
def is_file_type(file_path: str, extensions: List[str]) -> bool: | ||
"""Check if the file is of a type specified by extensions.""" | ||
return any(file_path.endswith(ext) or file_path == ext for ext in extensions) | ||
|
||
|
||
def is_excluded_file( | ||
file_path: str, excluded_dirs: List[str], excluded_files: List[str] | ||
) -> bool: | ||
"""Check if the file should be excluded based on directories or file names.""" | ||
if any( | ||
file_path.startswith(f"{ex_dir}/") or f"/{ex_dir}/" in file_path | ||
for ex_dir in excluded_dirs | ||
): | ||
return True | ||
return file_path.split("/")[-1] in excluded_files | ||
|
||
|
||
def has_test_indicators(content: str, indicators: List[str]) -> bool: | ||
"""Check if file content contains test indicators specific to a language.""" | ||
return any(indicator in content for indicator in indicators) | ||
|
||
|
||
def has_sufficient_content(file_content: str, min_line_count: int = 10) -> bool: | ||
"""Check if the file content has a sufficient number of substantive lines.""" | ||
lines = [ | ||
line | ||
for line in file_content.split("\n") | ||
if line.strip() and not line.strip().startswith(("#", "//")) | ||
] | ||
return len(lines) >= min_line_count | ||
|
||
|
||
def remove_comments_and_docstrings(source: str) -> str: | ||
"""Remove comments and docstrings from Python source code.""" | ||
try: | ||
tree = ast.parse(source) | ||
for node in ast.walk(tree): | ||
if isinstance( | ||
node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef) | ||
) and ast.get_docstring(node): | ||
node.body = node.body[1:] # Remove docstring | ||
elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Str): | ||
node.value.s = "" # Remove comments | ||
return ast.unparse(tree) | ||
except SyntaxError as e: | ||
logging.error(f"Error parsing Python source: {e}") | ||
return source # Return original source if it cannot be parsed | ||
|
||
|
||
def is_likely_useful_file(file_path: str, language: str) -> bool: | ||
""" | ||
Determines if the file is likely to be useful by checking against configured exclusions. | ||
""" | ||
config = get_language_config()[language] | ||
return not is_excluded_file( | ||
file_path, config["excluded_dirs"], config["excluded_files"] | ||
) | ||
|
||
|
||
def download_and_process_files( | ||
repo_url: str, | ||
output_file: str, | ||
language: str, | ||
keep_comments: bool, | ||
branch_or_tag: str = "master", | ||
): | ||
"""Download and process files from a GitHub repository based on language settings.""" | ||
try: | ||
config = get_language_config()[language] | ||
download_url = f"{repo_url}/archive/refs/heads/{branch_or_tag}.zip" | ||
response = requests.get(download_url) | ||
|
||
if response.status_code == 200: | ||
zip_file = zipfile.ZipFile(io.BytesIO(response.content)) | ||
with open(output_file, "w", encoding="utf-8") as outfile: | ||
for file_path in zip_file.namelist(): | ||
if ( | ||
file_path.endswith("/") | ||
or not is_file_type(file_path, config["extensions"]) | ||
or not is_likely_useful_file(file_path, language) | ||
): | ||
continue | ||
file_content = zip_file.read(file_path).decode("utf-8") | ||
|
||
if has_test_indicators( | ||
file_content, config["test_indicators"] | ||
) or not has_sufficient_content(file_content): | ||
continue | ||
if language == "python" and not keep_comments: | ||
file_content = remove_comments_and_docstrings(file_content) | ||
|
||
comment_tag = "//" if language == "go" else "#" | ||
outfile.write( | ||
f"{comment_tag} File: {file_path}\n{file_content}\n\n" | ||
) | ||
logging.info( | ||
f"Combined {language.capitalize()} source code saved to {output_file}" | ||
) | ||
else: | ||
logging.error( | ||
f"Failed to download the repository. Status code: {response.status_code}" | ||
) | ||
except Exception as e: | ||
logging.error(f"An error occurred: {e}") | ||
sys.exit(1) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser( | ||
description="Download and process files from a GitHub repository." | ||
) | ||
parser.add_argument("repo_url", type=str, help="The URL of the GitHub repository") | ||
parser.add_argument( | ||
"--lang", | ||
type=str, | ||
choices=get_language_config().keys(), | ||
default="python", | ||
help="The programming language of the repository", | ||
) | ||
parser.add_argument( | ||
"--keep-comments", | ||
action="store_true", | ||
help="Keep comments and docstrings in the source code (only applicable for Python)", | ||
) | ||
parser.add_argument( | ||
"--branch_or_tag", | ||
type=str, | ||
help="The branch or tag of the repository to download", | ||
default="master", | ||
) | ||
args = parser.parse_args() | ||
|
||
output_file = f"{args.repo_url.split('/')[-1]}_{args.lang}.txt" | ||
download_and_process_files( | ||
args.repo_url, output_file, args.lang, args.keep_comments, args.branch_or_tag | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
where is
ASSIGNEE
being set?