# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
OUTPUT := .output
CLANG ?= clang
CARGO ?= $(shell which cargo)
AWK ?= awk
DEBUG ?=
BLAZESYM_DEBUG ?=
# Packages (CentOS) for static  build of wprof:
#   - glibc-static
#   - elfutils-libelf-devel-static
#   - zlib-static
#   - libzstd-static
#
STATIC ?=
LTO ?=

# for installation
DESTDIR ?=
prefix ?= /usr/local
bindir ?= $(prefix)/bin

LIBBPF_SRC := $(abspath ../libbpf/src)
BPFTOOL_SRC := $(abspath ../bpftool/src)
LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a)
BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool)
BPFTOOL_OUTPUT_ABS ?= $(abspath $(OUTPUT)/bpftool)
BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool
LIBBLAZESYM_SRC := $(abspath ../blazesym/)
LIBBLAZESYM_INC := $(abspath $(LIBBLAZESYM_SRC)/capi/include)
LIBBLAZESYM_OBJ := $(abspath $(OUTPUT)/libblazesym_c.a)
LIBDEMANGLE_SRC := $(abspath demangle/)
LIBDEMANGLE_OBJ := $(abspath $(OUTPUT)/libdemangle_c.a)
STROBELIGHT_LIBS_SRC := $(abspath ../strobelight-libs)
ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \
			 | sed 's/arm.*/arm/' \
			 | sed 's/aarch64/arm64/' \
			 | sed 's/ppc64le/powerpc/' \
			 | sed 's/mips.*/mips/' \
			 | sed 's/riscv64/riscv/' \
			 | sed 's/loongarch64/loongarch/')
VMLINUX := ../vmlinux.h/include/$(ARCH)/vmlinux.h
# Build pystacks BPF code from strobelight-libs
PYSTACKS_BPF_SRC := $(STROBELIGHT_LIBS_SRC)/strobelight/bpf_lib/python/pystacks/pystacks.bpf.c
PYSTACKS_COMMON_DIR := $(STROBELIGHT_LIBS_SRC)/strobelight/bpf_lib/common
PYSTACKS_COMMON_HELPERS := common bpf_read_helpers namespace_helpers pid_target_helpers pthread_helpers task_helpers
PYSTACKS_INCLUDES := -I$(STROBELIGHT_LIBS_SRC)
PYSTACKS_CFLAGS := -DWPROF_PYSTACKS
PYSTACKS_BPF_CFLAGS := -D__$(shell uname -m)__ -Dint8_t=s8

# Use our own libbpf API headers and Linux UAPI headers distributed with
# libbpf to avoid dependency on system-wide headers, which could be missing or
# outdated
INCLUDES := -I$(OUTPUT) -I../include -I../libbpf/include/uapi				\
	    -I../libbpf/include -I$(dir $(VMLINUX)) -I$(LIBBLAZESYM_INC) -I../usdt	\
			$(PYSTACKS_INCLUDES)
CFLAGS ?= -g -O$(if $(DEBUG),0,2) -Wall
ALL_CFLAGS := $(CFLAGS) $(EXTRA_CFLAGS) -fno-omit-frame-pointer				\
	      -Wno-c23-extensions -Wno-sign-compare -Wno-format-truncation		\
	      $(if $(LTO),-flto=auto -ffat-lto-objects) $(PYSTACKS_CFLAGS)		\
	      $(if $(DEBUG),-DDEBUG)
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS) -lrt -ldl -lpthread -lm

# Get Clang's default includes on this system. We'll explicitly add these dirs
# to the includes list when compiling with `-target bpf` because otherwise some
# architecture-specific dirs will be "missing" on some architectures/distros -
# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h,
# sys/cdefs.h etc. might be missing.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - </dev/null 2>&1 \
	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')

ifeq ($(V),1)
	Q =
	msg =
else
	Q = @
	msg = @printf '  %-8s %s%s\n'					\
		      "$(1)"						\
		      "$(patsubst $(abspath $(OUTPUT))/%,%,$(2))"	\
		      "$(if $(3), $(3))";
	MAKEFLAGS += --no-print-directory
endif

define allow-override
  $(if $(or $(findstring environment,$(origin $(1))),\
            $(findstring command line,$(origin $(1)))),,\
    $(eval $(1) = $(2)))
endef

$(call allow-override,CC,$(CROSS_COMPILE)cc)
$(call allow-override,LD,$(CROSS_COMPILE)ld)

.PHONY: all
all: wprof

$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT):
	$(call msg,MKDIR,$@)
	$(Q)mkdir -p $@

# Build libbpf
$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf
	$(call msg,LIB,$@)
	$(Q) [ -d ../libbpf/src ] || git submodule update --init --recursive
	$(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1		      \
		    OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@)		      \
		    INCLUDEDIR= LIBDIR= UAPIDIR=			      \
		    EXTRA_CFLAGS="-g -O$(if $(DEBUG),0,2) -Wno-sign-compare"  \
		    install

# Build bpftool
$(BPFTOOL): | $(BPFTOOL_OUTPUT)
	$(call msg,BPFTOOL,$@)
	$(Q) [ -d ../bpftool/src ] || git submodule update --init --recursive
	$(Q)CFLAGS= EXTRA_CFLAGS="$(CFLAGS) -Wno-sign-compare" 			\
		$(MAKE) ARCH= CROSS_COMPILE=					\
			BPF_DIR=$(LIBBPF_SRC) OUTPUT=$(BPFTOOL_OUTPUT_ABS)/	\
			-C $(BPFTOOL_SRC) bootstrap

# Build blazesym
$(LIBBLAZESYM_SRC)/target/$(if $(BLAZESYM_DEBUG),debug,release)/libblazesym_c.a::
	$(call msg,LIB,$@)
	$(Q) [ -d ../blazesym/src ] || git submodule update --init --recursive
	$(Q)cd $(LIBBLAZESYM_SRC) && $(CARGO) build --package=blazesym-c -q $(if $(BLAZESYM_DEBUG),,--release)

$(LIBBLAZESYM_OBJ): $(LIBBLAZESYM_SRC)/target/$(if $(BLAZESYM_DEBUG),debug,release)/libblazesym_c.a | $(OUTPUT)
	$(Q)cp $(LIBBLAZESYM_SRC)/target/$(if $(BLAZESYM_DEBUG),debug,release)/libblazesym_c.a $@

# Build demangle shim
$(LIBDEMANGLE_SRC)/target/release/libdemangle_c.a::
	$(call msg,LIB,$@)
	$(Q)cd $(LIBDEMANGLE_SRC) && $(CARGO) build --release -q

$(LIBDEMANGLE_OBJ): $(LIBDEMANGLE_SRC)/target/release/libdemangle_c.a | $(OUTPUT)
	$(Q)cp $< $@

# Build BPF code
$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL)
	$(call msg,BPF,$@)
	$(Q)$(CLANG) -g -Wall -O2 -target bpf -mcpu=v4 -D__TARGET_ARCH_$(ARCH)			\
		     $(PYSTACKS_CFLAGS)								\
		     $(INCLUDES) $(CLANG_BPF_SYS_INCLUDES)		      			\
		     -c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
	$(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@)

$(OUTPUT)/wprof.bpf.o: wprof.h wprof.bpf.h
$(OUTPUT)/utils.bpf.o: wprof.h wprof.bpf.h
$(OUTPUT)/scx.bpf.o: wprof.h wprof.bpf.h

# Compile strobelight-libs common BPF helpers
$(OUTPUT)/sl_%.tmp.bpf.o: $(PYSTACKS_COMMON_DIR)/%.bpf.c $(LIBBPF_OBJ) $(VMLINUX) | $(OUTPUT) $(BPFTOOL)
	$(call msg,BPF,$@)
	$(Q)$(CLANG) -g -Wall -O2 -target bpf -mcpu=v4 -D__TARGET_ARCH_$(ARCH)			\
		     $(PYSTACKS_BPF_CFLAGS)						\
		     $(INCLUDES) $(PYSTACKS_INCLUDES) $(CLANG_BPF_SYS_INCLUDES)		\
		     -c $< -o $@

# Compile pystacks BPF
$(OUTPUT)/sl_pystacks.tmp.bpf.o: $(PYSTACKS_BPF_SRC) $(LIBBPF_OBJ) $(VMLINUX) | $(OUTPUT) $(BPFTOOL)
	$(call msg,BPF,$@)
	$(Q)$(CLANG) -g -Wall -O2 -target bpf -mcpu=v4 -D__TARGET_ARCH_$(ARCH)			\
		     $(PYSTACKS_BPF_CFLAGS)						\
		     $(INCLUDES) $(PYSTACKS_INCLUDES) $(CLANG_BPF_SYS_INCLUDES)		\
		     -c $< -o $@

# Link pystacks + common BPF objects
SL_ALL_TMP_OBJS := $(OUTPUT)/sl_pystacks.tmp.bpf.o \
		   $(patsubst %,$(OUTPUT)/sl_%.tmp.bpf.o,$(PYSTACKS_COMMON_HELPERS))
$(OUTPUT)/pystacks.bpf.o: $(SL_ALL_TMP_OBJS) | $(BPFTOOL)
	$(call msg,BPF-LINK,$@)
	$(Q)$(BPFTOOL) gen object $@ $^

PYSTACKS_BPF_OBJS := $(OUTPUT)/pystacks.bpf.o

# Generate BPF skeletons
$(OUTPUT)/wprof.skel.h: $(OUTPUT)/wprof.bpf.o $(OUTPUT)/utils.bpf.o $(OUTPUT)/scx.bpf.o $(PYSTACKS_BPF_OBJS) | $(OUTPUT) $(BPFTOOL)
	$(call msg,GEN-SKEL,$@)
	$(Q)$(BPFTOOL) gen object $(OUTPUT)/wprof-final.bpf.o $^
	$(Q)$(BPFTOOL) gen skeleton $(OUTPUT)/wprof-final.bpf.o name wprof_bpf > $@

WPROF_SRCS := wprof.c utils.c env.c protobuf.c data.c emit.c stacktrace.c pmu.c			\
	      pb_common.c pb_encode.c perfetto_trace.pb.c topology.c proc.c			\
	      requests.c cuda.c inject.c sys.c elf_utils.c merge.c persist.c			\
	      cupti_driver_cbid_map.gen.c cupti_runtime_cbid_map.gen.c				\
	      pystacks.c pydisc.c pysym.c pyline.c pytrace.c utrace_cfg.c utrace.c blobset.c
WPROF_HDRS := wprof.h utils.h env.h protobuf.h data.h emit.h stacktrace.h topology.h pmu.h	\
	      proc.h requests.h cuda.h inj_common.h inject.h sys.h elf_utils.h cuda_data.h 	\
	      wprof_types.h demangle.h merge.h wevent.h persist.h				\
	      pystacks.h pyoffsets.h pydisc.h pysym.h pyline.h pytrace.h pytrace_data.h		\
	      utrace_cfg.h utrace.h strs.h blobset.h
WPROF_OBJS := $(patsubst %.c,$(OUTPUT)/%.o,$(WPROF_SRCS))

WPROFINJ_SRCS := inj_lib.c inj_strset.c inj_hashmap.c inj_cupti.c inj_pytrace.c inj_torch_profiler.c
WPROFINJ_HDRS := inj_common.h inj.h wprof_cupti.h pytrace_data.h
WPROFINJ_OBJS := $(patsubst %.c,$(OUTPUT)/%.o,$(WPROFINJ_SRCS))

$(WPROFINJ_OBJS): ALL_CFLAGS += -fPIC
$(WPROFINJ_OBJS): INCLUDES += -I../libbpf/src

# Build user space object files
$(WPROF_OBJS) $(WPROFINJ_OBJS): $(OUTPUT)/%.o: %.c $(OUTPUT)/wprof.skel.h $(WPROF_HDRS) $(WPROFINJ_HDRS) | $(OUTPUT)
	$(call msg,CC,$@)
	$(Q)$(CC) $(ALL_CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@

# Build injection shared library
$(OUTPUT)/libwprofinj.so: $(patsubst %.c,$(OUTPUT)/%.o,$(WPROFINJ_SRCS))			\
			  $(WPROFINJ_HDRS)							\
			  | $(OUTPUT)
	$(call msg,CC,$@)
	$(Q)$(CC) $(ALL_CFLAGS) -shared $(filter %.c %.a %.o,$^) -o $@

$(OUTPUT)/libwprofinj_embed.o: $(OUTPUT)/libwprofinj.so
	$(call msg,LD,$@)
	$(Q)$(LD) -r -b binary $< -o $@								\
		--defsym libwprofinj_so_start=_binary__output_libwprofinj_so_start		\
		--defsym libwprofinj_so_end=_binary__output_libwprofinj_so_end			\
		--defsym libwprofinj_so_size=_binary__output_libwprofinj_so_size

$(OUTPUT)/json_schema_embed.o: ../JSON_SCHEMA.md
	$(call msg,LD,$@)
	$(Q)$(LD) -r -b binary $< -o $@								\
		--defsym json_schema_start=_binary____JSON_SCHEMA_md_start			\
		--defsym json_schema_end=_binary____JSON_SCHEMA_md_end

# Build application binary
wprof: $(WPROF_HDRS) $(WPROF_OBJS)								\
		$(OUTPUT)/libwprofinj_embed.o							\
		$(OUTPUT)/json_schema_embed.o							\
		$(LIBBPF_OBJ) $(LIBBLAZESYM_OBJ) $(LIBDEMANGLE_OBJ) | $(OUTPUT)
	$(call msg,BINARY,$@)
	$(Q)$(CC) $(ALL_CFLAGS) $(filter %.c %.a %.o,$^) $(ALL_LDFLAGS) $(if $(STATIC),--static) -lelf -lz -lzstd -o $@

.PHONY: clean
clean:
	$(call msg,CLEAN)
	$(Q)rm -rf $(OUTPUT)/*.o $(OUTPUT)/libwprofinj.* wprof

.PHONY: clean-all
clean-all: clean
	$(call msg,CLEAN-ALL)
	$(Q)rm -rf $(OUTPUT) $(LIBBLAZESYM_SRC)/target

# delete failed targets
.DELETE_ON_ERROR:

# keep intermediate (.skel.h, .bpf.o, etc) targets
.SECONDARY:

install: wprof
	mkdir -p $(DESTDIR)$(bindir)
	cp -p wprof $(DESTDIR)$(bindir)/
