Skip to content

Commit

Permalink
Pipeline fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
jakep-allenai committed Jan 30, 2025
1 parent 7fbbb57 commit cf6673c
Showing 1 changed file with 14 additions and 9 deletions.
23 changes: 14 additions & 9 deletions olmocr/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,10 +701,21 @@ def submit_beaker_job(args):

b.secret.write(f"{owner}-WEKA_ACCESS_KEY_ID", os.environ.get("WEKA_ACCESS_KEY_ID", ""), args.beaker_workspace)
b.secret.write(f"{owner}-WEKA_SECRET_ACCESS_KEY", os.environ.get("WEKA_SECRET_ACCESS_KEY", ""), args.beaker_workspace)
b.secret.write(f"{owner}-AWS_CREDENTIALS_FILE", open(os.path.join(os.path.expanduser("~"), ".aws", "credentials")).read(), args.beaker_workspace)
b.secret.write(
f"{owner}-AWS_CREDENTIALS_FILE",
open(os.path.join(os.path.expanduser("~"), ".aws", "credentials")).read(),
args.beaker_workspace,
)

env_var_secrets = [
EnvVar(name="WEKA_ACCESS_KEY_ID", secret=f"{owner}-WEKA_ACCESS_KEY_ID"),
EnvVar(name="WEKA_SECRET_ACCESS_KEY", secret=f"{owner}-WEKA_SECRET_ACCESS_KEY"),
EnvVar(name="AWS_CREDENTIALS_FILE", secret=f"{owner}-AWS_CREDENTIALS_FILE"),
]

try:
b.secret.get("OE_DATA_GCS_SA_KEY", args.beaker_workspace)
env_var_secrets.append(EnvVar(name="GOOGLE_APPLICATION_CREDENTIALS_FILE", secret="OE_DATA_GCS_SA_KEY"))
except SecretNotFound:
print("Input the olmo-gcs SA key if you would like to load weights from gcs (end with a double newline):")
lines = []
Expand All @@ -717,6 +728,7 @@ def submit_beaker_job(args):
gcs_sa_key = "\n".join(lines[:-1]).strip() # Remove the last empty line
if gcs_sa_key:
b.secret.write("OE_DATA_GCS_SA_KEY", gcs_sa_key, args.beaker_workspace)
env_var_secrets.append(EnvVar(name="GOOGLE_APPLICATION_CREDENTIALS_FILE", secret="OE_DATA_GCS_SA_KEY"))

# Create the experiment spec
experiment_spec = ExperimentSpec(
Expand All @@ -734,14 +746,7 @@ def submit_beaker_job(args):
),
image=ImageSource(beaker=beaker_image),
command=["python", "-m", "olmocr.beakerpipeline"] + args_list,
env_vars=[
EnvVar(name="BEAKER_JOB_NAME", value=task_name),
EnvVar(name="OWNER", value=owner),
EnvVar(name="WEKA_ACCESS_KEY_ID", secret=f"{owner}-WEKA_ACCESS_KEY_ID"),
EnvVar(name="WEKA_SECRET_ACCESS_KEY", secret=f"{owner}-WEKA_SECRET_ACCESS_KEY"),
EnvVar(name="AWS_CREDENTIALS_FILE", secret=f"{owner}-AWS_CREDENTIALS_FILE"),
EnvVar(name="GOOGLE_APPLICATION_CREDENTIALS_FILE", secret="OE_DATA_GCS_SA_KEY"),
],
env_vars=[EnvVar(name="BEAKER_JOB_NAME", value=task_name), EnvVar(name="OWNER", value=owner)] + env_var_secrets,
resources=TaskResources(gpu_count=1),
constraints=Constraints(cluster=args.beaker_cluster if isinstance(args.beaker_cluster, list) else [args.beaker_cluster]),
result=ResultSpec(path="/noop-results"),
Expand Down

0 comments on commit cf6673c

Please sign in to comment.