img2pdf/internal/convert/convert.go
stu f00ca45f59 Update .gitignore, README, and add build outputs and tooling
- Updated .gitignore to reflect current project structure
- Updated README with current build and installation instructions
- Added cmd, internal, and scripts directories for project organization
- Added build artifacts and installation scripts

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-01 09:28:38 -05:00

220 lines
5.4 KiB
Go

package convert
import (
"bytes"
"fmt"
"image"
"image/color"
_ "image/gif"
"image/jpeg"
_ "image/png"
"sort"
"strconv"
"strings"
)
// SourceFile represents an uploaded image that should become a PDF page.
type SourceFile struct {
Name string
Data []byte
}
// ToPDF converts the given image files into a single PDF document using only
// standard library encoders.
func ToPDF(files []SourceFile) ([]byte, error) {
if len(files) == 0 {
return nil, fmt.Errorf("no images provided")
}
sort.Slice(files, func(i, j int) bool {
return NaturalLess(files[i].Name, files[j].Name)
})
var pages []pdfImage
for _, f := range files {
img, _, err := image.Decode(bytes.NewReader(f.Data))
if err != nil {
return nil, fmt.Errorf("decode %s: %w", f.Name, err)
}
jpegData, w, h, err := toJPEG(img)
if err != nil {
return nil, fmt.Errorf("encode %s: %w", f.Name, err)
}
pages = append(pages, pdfImage{
Width: w,
Height: h,
Data: jpegData,
})
}
return buildPDF(pages)
}
// toJPEG flattens any transparency onto white and returns JPEG bytes.
func toJPEG(img image.Image) ([]byte, int, int, error) {
b := img.Bounds()
rgba := image.NewRGBA(b)
// Default to white background.
for y := b.Min.Y; y < b.Max.Y; y++ {
for x := b.Min.X; x < b.Max.X; x++ {
c := color.RGBAModel.Convert(img.At(x, y)).(color.RGBA)
// Premultiply to drop alpha onto white.
if c.A < 255 {
alpha := float64(c.A) / 255.0
c.R = uint8(float64(c.R)*alpha + 255*(1-alpha))
c.G = uint8(float64(c.G)*alpha + 255*(1-alpha))
c.B = uint8(float64(c.B)*alpha + 255*(1-alpha))
c.A = 255
}
rgba.SetRGBA(x, y, c)
}
}
var buf bytes.Buffer
if err := jpeg.Encode(&buf, rgba, &jpeg.Options{Quality: 90}); err != nil {
return nil, 0, 0, err
}
return buf.Bytes(), b.Dx(), b.Dy(), nil
}
// NaturalLess performs a natural, case-insensitive comparison of two strings.
func NaturalLess(a, b string) bool {
as := splitNumeric(strings.ToLower(a))
bs := splitNumeric(strings.ToLower(b))
for i := 0; i < len(as) && i < len(bs); i++ {
if as[i] == bs[i] {
continue
}
if ia, oka := toInt(as[i]); oka {
if ib, okb := toInt(bs[i]); okb {
return ia < ib
}
}
return as[i] < bs[i]
}
return len(as) < len(bs)
}
func splitNumeric(s string) []string {
var parts []string
curr := strings.Builder{}
isDigit := func(r rune) bool { return r >= '0' && r <= '9' }
var digitMode *bool
for _, r := range s {
digit := isDigit(r)
if digitMode == nil {
digitMode = &digit
}
if digit != *digitMode {
parts = append(parts, curr.String())
curr.Reset()
digitMode = &digit
}
curr.WriteRune(r)
}
if curr.Len() > 0 {
parts = append(parts, curr.String())
}
return parts
}
func toInt(s string) (int, bool) {
n := 0
for _, r := range s {
if r < '0' || r > '9' {
return 0, false
}
n = n*10 + int(r-'0')
}
return n, true
}
// pdfImage holds minimal data for embedding into a PDF.
type pdfImage struct {
Width int
Height int
Data []byte
}
// buildPDF writes a very small PDF with each image on its own page sized to the image.
func buildPDF(images []pdfImage) ([]byte, error) {
if len(images) == 0 {
return nil, fmt.Errorf("no images")
}
var buf bytes.Buffer
write := func(s string) {
buf.WriteString(s)
buf.WriteByte('\n')
}
type obj struct {
offset int
}
var offsets []obj
addObject := func(body string) {
offsets = append(offsets, obj{offset: buf.Len()})
write(fmt.Sprintf("%d 0 obj", len(offsets)))
write(body)
write("endobj")
}
write("%PDF-1.4")
// Placeholder for catalog and pages; will reference counts after we know them.
addObject("<< /Type /Catalog /Pages 2 0 R >>")
addObject("") // pages placeholder
pageObjects := []int{}
for i, img := range images {
pageNum := len(offsets) + 1
contentNum := pageNum + 1
imageNum := pageNum + 2
pageObjects = append(pageObjects, pageNum)
mediaBox := fmt.Sprintf("[0 0 %d %d]", img.Width, img.Height)
resources := fmt.Sprintf("<< /XObject << /Im%d %d 0 R >> >>", i, imageNum)
addObject(fmt.Sprintf("<< /Type /Page /Parent 2 0 R /MediaBox %s /Resources %s /Contents %d 0 R >>", mediaBox, resources, contentNum))
contentStream := fmt.Sprintf("q %d 0 0 %d 0 0 cm /Im%d Do Q", img.Width, img.Height, i)
addObject(streamObject(contentStream))
imgDict := fmt.Sprintf("<< /Type /XObject /Subtype /Image /Width %d /Height %d /ColorSpace /DeviceRGB /BitsPerComponent 8 /Filter /DCTDecode /Length %d >>", img.Width, img.Height, len(img.Data))
offsets = append(offsets, obj{offset: buf.Len()})
write(fmt.Sprintf("%d 0 obj", len(offsets)))
write(imgDict)
write("stream")
buf.Write(img.Data)
write("\nendstream")
write("endobj")
}
// Rewrite pages object now that we know kids.
var kids strings.Builder
for _, p := range pageObjects {
kids.WriteString(fmt.Sprintf(" %d 0 R", p))
}
offsets[1].offset = buf.Len()
write("2 0 obj")
write(fmt.Sprintf("<< /Type /Pages /Count %d /Kids [%s ] >>", len(pageObjects), kids.String()))
write("endobj")
// Write xref.
xrefPos := buf.Len()
write("xref")
write(fmt.Sprintf("0 %d", len(offsets)+1))
write("0000000000 65535 f ")
for _, o := range offsets {
write(fmt.Sprintf("%010d 00000 n ", o.offset))
}
write("trailer")
write(fmt.Sprintf("<< /Size %d /Root 1 0 R >>", len(offsets)+1))
write("startxref")
write(strconv.Itoa(xrefPos))
write("%%EOF")
return buf.Bytes(), nil
}
func streamObject(content string) string {
return fmt.Sprintf("<< /Length %d >>\nstream\n%s\nendstream", len(content), content)
}