From f29ebc57d35210d0ae0abfdcfe438839e5e3b172 Mon Sep 17 00:00:00 2001 From: Moritz Marquardt Date: Wed, 1 Dec 2021 16:23:37 +0100 Subject: [PATCH 1/2] Handle certificate errors with mock certificates (fixes #10) --- certificates.go | 263 +++++++++++++++++++++++++++++++++--------------- main.go | 19 ++++ 2 files changed, 200 insertions(+), 82 deletions(-) diff --git a/certificates.go b/certificates.go index b7c152f..f85109d 100644 --- a/certificates.go +++ b/certificates.go @@ -6,20 +6,23 @@ import ( "crypto/ecdsa" "crypto/elliptic" "crypto/rand" + "crypto/rsa" "crypto/tls" "crypto/x509" + "crypto/x509/pkix" "encoding/gob" "encoding/json" + "encoding/pem" "errors" "github.com/OrlovEvgeny/go-mcache" "github.com/akrylysov/pogreb/fs" "github.com/go-acme/lego/v4/certificate" "github.com/go-acme/lego/v4/challenge" - "github.com/go-acme/lego/v4/challenge/resolver" "github.com/go-acme/lego/v4/challenge/tlsalpn01" "github.com/go-acme/lego/v4/providers/dns" "io/ioutil" "log" + "math/big" "os" "strings" "sync" @@ -127,7 +130,11 @@ var tlsConfig = &tls.Config{ } var keyCache = mcache.New() -var keyDatabase *pogreb.DB +var keyDatabase, keyDatabaseErr = pogreb.Open("key-database.pogreb", &pogreb.Options{ + BackgroundSyncInterval: 30 * time.Second, + BackgroundCompactionInterval: 6 * time.Hour, + FileSystem: fs.OSMMap, +}) func CheckUserLimit(user string) error { userLimit, ok := acmeClientCertificateLimitPerUser[user] @@ -162,18 +169,6 @@ func (u *AcmeAccount) GetPrivateKey() crypto.PrivateKey { return u.Key } -func newAcmeClient(configureChallenge func(*resolver.SolverManager) error) *lego.Client { - acmeClient, err := lego.NewClient(myAcmeConfig) - if err != nil { - panic(err) - } - err = configureChallenge(acmeClient.Challenge) - if err != nil { - panic(err) - } - return acmeClient -} - var acmeClient, mainDomainAcmeClient *lego.Client var acmeClientCertificateLimitPerUser = map[string]*equalizer.TokenBucket{} @@ -181,8 +176,8 @@ var acmeClientCertificateLimitPerUser = map[string]*equalizer.TokenBucket{} // TODO: when this is used a lot, we probably have to think of a somewhat better solution? var acmeClientOrderLimit = equalizer.NewTokenBucket(25, 15*time.Minute) -// rate limit is 20 / second, we want 10 / second -var acmeClientRequestLimit = equalizer.NewTokenBucket(10, 1*time.Second) +// rate limit is 20 / second, we want 5 / second (especially as one cert takes at least two requests) +var acmeClientRequestLimit = equalizer.NewTokenBucket(5, 1*time.Second) var challengeCache = mcache.New() @@ -277,16 +272,25 @@ func obtainCert(acmeClient *lego.Client, domains []string, renew *certificate.Re } defer obtainLocks.Delete(name) + if acmeClient == nil { + return mockCert(domains[0], "ACME client uninitialized. This is a server error, please report!"), nil + } + // request actual cert var res *certificate.Resource var err error - if renew != nil { + if renew != nil && renew.CertURL != "" { if os.Getenv("ACME_USE_RATE_LIMITS") != "false" { acmeClientRequestLimit.Take() } log.Printf("Renewing certificate for %v", domains) res, err = acmeClient.Certificate.Renew(*renew, true, false, "") - } else { + if err != nil { + log.Printf("Couldn't renew certificate for %v, trying to request a new one: %s", domains, err) + res = nil + } + } + if res == nil { if user != "" { if err := CheckUserLimit(user); err != nil { return tls.Certificate{}, err @@ -306,7 +310,7 @@ func obtainCert(acmeClient *lego.Client, domains []string, renew *certificate.Re } if err != nil { log.Printf("Couldn't obtain certificate for %v: %s", domains, err) - return tls.Certificate{}, err + return mockCert(domains[0], err.Error()), err } log.Printf("Obtained certificate for %v", domains) @@ -317,11 +321,6 @@ func obtainCert(acmeClient *lego.Client, domains []string, renew *certificate.Re panic(err) } err = keyDatabase.Put([]byte(name), resGob.Bytes()) - if err != nil { - _ = keyDatabase.Delete([]byte(name + "/key")) - obtainLocks.Delete(name) - panic(err) - } tlsCertificate, err := tls.X509KeyPair(res.Certificate, res.PrivateKey) if err != nil { @@ -330,21 +329,97 @@ func obtainCert(acmeClient *lego.Client, domains []string, renew *certificate.Re return tlsCertificate, nil } -func setupCertificates() { - var err error - keyDatabase, err = pogreb.Open("key-database.pogreb", &pogreb.Options{ - BackgroundSyncInterval: 30 * time.Second, - BackgroundCompactionInterval: 6 * time.Hour, - FileSystem: fs.OSMMap, - }) +func mockCert(domain string, msg string) tls.Certificate { + key, err := certcrypto.GeneratePrivateKey(certcrypto.RSA2048) if err != nil { panic(err) } + template := x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{ + CommonName: domain, + Organization: []string{"Codeberg Pages Error Certificate (couldn't obtain ACME certificate)"}, + OrganizationalUnit: []string{ + "Will not try again for 6 hours to avoid hitting rate limits for your domain.", + "Check https://docs.codeberg.org/codeberg-pages/troubleshooting/ for troubleshooting tips, and feel " + + "free to create an issue at https://codeberg.org/Codeberg/pages-server if you can't solve it.\n", + "Error message: " + msg, + }, + }, + + // certificates younger than 7 days are renewed, so this enforces the cert to not be renewed for a 6 hours + NotAfter: time.Now().Add(time.Hour * 24 * 7 + time.Hour * 6), + NotBefore: time.Now(), + + KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + BasicConstraintsValid: true, + } + certBytes, err := x509.CreateCertificate( + rand.Reader, + &template, + &template, + &key.(*rsa.PrivateKey).PublicKey, + key, + ) + if err != nil { + panic(err) + } + + out := &bytes.Buffer{} + err = pem.Encode(out, &pem.Block{ + Bytes: certBytes, + Type: "CERTIFICATE", + }) + if err != nil { + panic(err) + } + outBytes := out.Bytes() + res := &certificate.Resource{ + PrivateKey: certcrypto.PEMEncode(key), + Certificate: outBytes, + IssuerCertificate: outBytes, + Domain: domain, + } + var resGob bytes.Buffer + resEnc := gob.NewEncoder(&resGob) + err = resEnc.Encode(res) + if err != nil { + panic(err) + } + databaseName := domain + if domain == "*" + string(MainDomainSuffix) || domain == string(MainDomainSuffix[1:]) { + databaseName = string(MainDomainSuffix) + } + err = keyDatabase.Put([]byte(databaseName), resGob.Bytes()) + if err != nil { + panic(err) + } + + tlsCertificate, err := tls.X509KeyPair(res.Certificate, res.PrivateKey) + if err != nil { + panic(err) + } + return tlsCertificate +} + +func setupCertificates() { + if keyDatabaseErr != nil { + panic(keyDatabaseErr) + } + if os.Getenv("ACME_ACCEPT_TERMS") != "true" || (os.Getenv("DNS_PROVIDER") == "" && os.Getenv("ACME_API") != "https://acme.mock.directory") { panic(errors.New("you must set ACME_ACCEPT_TERMS and DNS_PROVIDER, unless ACME_API is set to https://acme.mock.directory")) } + // getting main cert before ACME account so that we can panic here on database failure without hitting rate limits + mainCertBytes, err := keyDatabase.Get(MainDomainSuffix) + if err != nil { + // key database is not working + panic(err) + } + if account, err := ioutil.ReadFile("acme-account.json"); err == nil { err = json.Unmarshal(account, &myAcmeAccount) if err != nil { @@ -357,7 +432,10 @@ func setupCertificates() { myAcmeConfig = lego.NewConfig(&myAcmeAccount) myAcmeConfig.CADirURL = envOr("ACME_API", "https://acme-v02.api.letsencrypt.org/directory") myAcmeConfig.Certificate.KeyType = certcrypto.RSA2048 - newAcmeClient(func(manager *resolver.SolverManager) error { return nil }) + _, err := lego.NewClient(myAcmeConfig) + if err != nil { + log.Printf("[ERROR] Can't create ACME client, continuing with mock certs only: %s", err) + } } else if os.IsNotExist(err) { privateKey, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) if err != nil { @@ -371,69 +449,90 @@ func setupCertificates() { myAcmeConfig = lego.NewConfig(&myAcmeAccount) myAcmeConfig.CADirURL = envOr("ACME_API", "https://acme-v02.api.letsencrypt.org/directory") myAcmeConfig.Certificate.KeyType = certcrypto.RSA2048 - tempClient := newAcmeClient(func(manager *resolver.SolverManager) error { return nil }) - - // accept terms & log in to EAB - if os.Getenv("ACME_EAB_KID") == "" || os.Getenv("ACME_EAB_HMAC") == "" { - reg, err := tempClient.Registration.Register(registration.RegisterOptions{TermsOfServiceAgreed: os.Getenv("ACME_ACCEPT_TERMS") == "true"}) - if err != nil { - panic(err) - } - myAcmeAccount.Registration = reg + tempClient, err := lego.NewClient(myAcmeConfig) + if err != nil { + log.Printf("[ERROR] Can't create ACME client, continuing with mock certs only: %s", err) } else { - reg, err := tempClient.Registration.RegisterWithExternalAccountBinding(registration.RegisterEABOptions{ - TermsOfServiceAgreed: os.Getenv("ACME_ACCEPT_TERMS") == "true", - Kid: os.Getenv("ACME_EAB_KID"), - HmacEncoded: os.Getenv("ACME_EAB_HMAC"), - }) - if err != nil { - panic(err) + // accept terms & log in to EAB + if os.Getenv("ACME_EAB_KID") == "" || os.Getenv("ACME_EAB_HMAC") == "" { + reg, err := tempClient.Registration.Register(registration.RegisterOptions{TermsOfServiceAgreed: os.Getenv("ACME_ACCEPT_TERMS") == "true"}) + if err != nil { + log.Printf("[ERROR] Can't register ACME account, continuing with mock certs only: %s", err) + } else { + myAcmeAccount.Registration = reg + } + } else { + reg, err := tempClient.Registration.RegisterWithExternalAccountBinding(registration.RegisterEABOptions{ + TermsOfServiceAgreed: os.Getenv("ACME_ACCEPT_TERMS") == "true", + Kid: os.Getenv("ACME_EAB_KID"), + HmacEncoded: os.Getenv("ACME_EAB_HMAC"), + }) + if err != nil { + log.Printf("[ERROR] Can't register ACME account, continuing with mock certs only: %s", err) + } else { + myAcmeAccount.Registration = reg + } } - myAcmeAccount.Registration = reg - } - acmeAccountJson, err := json.Marshal(myAcmeAccount) - if err != nil { - panic(err) - } - err = ioutil.WriteFile("acme-account.json", acmeAccountJson, 0600) - if err != nil { - panic(err) + if myAcmeAccount.Registration != nil { + acmeAccountJson, err := json.Marshal(myAcmeAccount) + if err != nil { + log.Printf("[FAIL] Error during json.Marshal(myAcmeAccount), waiting for manual restart to avoid rate limits: %s", err) + select {} + } + err = ioutil.WriteFile("acme-account.json", acmeAccountJson, 0600) + if err != nil { + log.Printf("[FAIL] Error during ioutil.WriteFile(\"acme-account.json\"), waiting for manual restart to avoid rate limits: %s", err) + select {} + } + } } } else { panic(err) } - acmeClient = newAcmeClient(func(challenge *resolver.SolverManager) error { - err = challenge.SetTLSALPN01Provider(AcmeTLSChallengeProvider{}) + acmeClient, err = lego.NewClient(myAcmeConfig) + if err != nil { + log.Printf("[ERROR] Can't create ACME client, continuing with mock certs only: %s", err) + } else { + err = acmeClient.Challenge.SetTLSALPN01Provider(AcmeTLSChallengeProvider{}) if err != nil { - return err + log.Printf("[ERROR] Can't create TLS-ALPN-01 provider: %s", err) } if os.Getenv("ENABLE_HTTP_SERVER") == "true" { - return challenge.SetHTTP01Provider(AcmeHTTPChallengeProvider{}) + err = acmeClient.Challenge.SetHTTP01Provider(AcmeHTTPChallengeProvider{}) + if err != nil { + log.Printf("[ERROR] Can't create HTTP-01 provider: %s", err) + } } - return err - }) - mainDomainAcmeClient = newAcmeClient(func(challenge *resolver.SolverManager) error { + } + + mainDomainAcmeClient, err = lego.NewClient(myAcmeConfig) + if err != nil { + log.Printf("[ERROR] Can't create ACME client, continuing with mock certs only: %s", err) + } else { if os.Getenv("DNS_PROVIDER") == "" { // using mock server, don't use wildcard certs - return challenge.SetTLSALPN01Provider(AcmeTLSChallengeProvider{}) + err := mainDomainAcmeClient.Challenge.SetTLSALPN01Provider(AcmeTLSChallengeProvider{}) + if err != nil { + log.Printf("[ERROR] Can't create TLS-ALPN-01 provider: %s", err) + } + } else { + provider, err := dns.NewDNSChallengeProviderByName(os.Getenv("DNS_PROVIDER")) + if err != nil { + log.Printf("[ERROR] Can't create DNS Challenge provider: %s", err) + } + err = mainDomainAcmeClient.Challenge.SetDNS01Provider(provider) + if err != nil { + log.Printf("[ERROR] Can't create DNS-01 provider: %s", err) + } } - provider, err := dns.NewDNSChallengeProviderByName(os.Getenv("DNS_PROVIDER")) - if err != nil { - return err - } - return challenge.SetDNS01Provider(provider) - }) + } - resBytes, err := keyDatabase.Get(MainDomainSuffix) - if err != nil { - // key database is not working - panic(err) - } else if resBytes == nil { + if mainCertBytes == nil { _, err = obtainCert(mainDomainAcmeClient, []string{"*" + string(MainDomainSuffix), string(MainDomainSuffix[1:])}, nil, "") if err != nil { - log.Fatalf("Couldn't renew certificate for *%s: %s", MainDomainSuffix, err) + log.Printf("[ERROR] Couldn't renew main domain certificate, continuing with mock certs only: %s", err) } } @@ -441,7 +540,7 @@ func setupCertificates() { for { err := keyDatabase.Sync() if err != nil { - log.Printf("Syncinc key database failed: %s", err) + log.Printf("[ERROR] Syncinc key database failed: %s", err) } time.Sleep(5 * time.Minute) } @@ -467,7 +566,7 @@ func setupCertificates() { if err != nil || !tlsCertificates[0].NotAfter.After(now) { err := keyDatabase.Delete(key) if err != nil { - log.Printf("Deleting expired certificate for %s failed: %s", string(key), err) + log.Printf("[ERROR] Deleting expired certificate for %s failed: %s", string(key), err) } else { expiredCertCount++ } @@ -475,14 +574,14 @@ func setupCertificates() { } key, resBytes, err = keyDatabaseIterator.Next() } - log.Printf("Removed %d expired certificates from the database", expiredCertCount) + log.Printf("[INFO] Removed %d expired certificates from the database", expiredCertCount) // compact the database result, err := keyDatabase.Compact() if err != nil { - log.Printf("Compacting key database failed: %s", err) + log.Printf("[ERROR] Compacting key database failed: %s", err) } else { - log.Printf("Compacted key database (%+v)", result) + log.Printf("[INFO] Compacted key database (%+v)", result) } // update main cert diff --git a/main.go b/main.go index 1d73e33..d6a1d7f 100644 --- a/main.go +++ b/main.go @@ -74,6 +74,25 @@ var IndexPages = []string{ // main sets up and starts the web server. func main() { + if len(os.Args) > 1 && os.Args[1] == "--remove-certificate" { + if len(os.Args) < 2 { + println("--remove-certificate requires at least one domain as an argument") + os.Exit(1) + } + if keyDatabaseErr != nil { + panic(keyDatabaseErr) + } + for _, domain := range os.Args[2:] { + if err := keyDatabase.Delete([]byte(domain)); err != nil { + panic(err) + } + } + if err := keyDatabase.Sync(); err != nil { + panic(err) + } + os.Exit(0) + } + // Make sure MainDomain has a trailing dot, and GiteaRoot has no trailing slash if !bytes.HasPrefix(MainDomainSuffix, []byte{'.'}) { MainDomainSuffix = append([]byte{'.'}, MainDomainSuffix...) From 544b3f73217522ff28c6f4ed99470290f60749d4 Mon Sep 17 00:00:00 2001 From: Moritz Marquardt Date: Wed, 1 Dec 2021 22:49:48 +0100 Subject: [PATCH 2/2] (Ab)use CSR field to store try-again date for renewals (instead of showing a mock cert), must be tested when the first renewals are due --- certificates.go | 92 ++++++++++++++++++++----------------------------- helpers.go | 37 +++++++++++++++++++- 2 files changed, 73 insertions(+), 56 deletions(-) diff --git a/certificates.go b/certificates.go index f85109d..36f0df0 100644 --- a/certificates.go +++ b/certificates.go @@ -24,6 +24,7 @@ import ( "log" "math/big" "os" + "strconv" "strings" "sync" "time" @@ -207,21 +208,9 @@ func (a AcmeHTTPChallengeProvider) CleanUp(domain, token, _ string) error { func retrieveCertFromDB(sni []byte) (tls.Certificate, bool) { // parse certificate from database - resBytes, err := keyDatabase.Get(sni) - if err != nil { - // key database is not working - panic(err) - } - if resBytes == nil { - return tls.Certificate{}, false - } - - resGob := bytes.NewBuffer(resBytes) - resDec := gob.NewDecoder(resGob) res := &certificate.Resource{} - err = resDec.Decode(res) - if err != nil { - panic(err) + if !PogrebGet(keyDatabase, sni, res) { + return tls.Certificate{}, false } tlsCertificate, err := tls.X509KeyPair(res.Certificate, res.PrivateKey) @@ -237,7 +226,15 @@ func retrieveCertFromDB(sni []byte) (tls.Certificate, bool) { // renew certificates 7 days before they expire if !tlsCertificate.Leaf.NotAfter.After(time.Now().Add(-7 * 24 * time.Hour)) { + if res.CSR != nil && len(res.CSR) > 0 { + // CSR stores the time when the renewal shall be tried again + nextTryUnix, err := strconv.ParseInt(string(res.CSR), 10, 64) + if err == nil && time.Now().Before(time.Unix(nextTryUnix, 0)) { + return tlsCertificate, true + } + } go (func() { + res.CSR = nil // acme client doesn't like CSR to be set tlsCertificate, err = obtainCert(acmeClient, []string{string(sni)}, res, "") if err != nil { log.Printf("Couldn't renew certificate for %s: %s", sni, err) @@ -310,18 +307,21 @@ func obtainCert(acmeClient *lego.Client, domains []string, renew *certificate.Re } if err != nil { log.Printf("Couldn't obtain certificate for %v: %s", domains, err) - return mockCert(domains[0], err.Error()), err + if renew != nil && renew.CertURL != "" { + tlsCertificate, err := tls.X509KeyPair(renew.Certificate, renew.PrivateKey) + if err == nil && tlsCertificate.Leaf.NotAfter.After(time.Now()) { + // avoid sending a mock cert instead of a still valid cert, instead abuse CSR field to store time to try again at + renew.CSR = []byte(strconv.FormatInt(time.Now().Add(6 * time.Hour).Unix(), 10)) + PogrebPut(keyDatabase, []byte(name), renew) + return tlsCertificate, nil + } + } else { + return mockCert(domains[0], err.Error()), err + } } log.Printf("Obtained certificate for %v", domains) - var resGob bytes.Buffer - resEnc := gob.NewEncoder(&resGob) - err = resEnc.Encode(res) - if err != nil { - panic(err) - } - err = keyDatabase.Put([]byte(name), resGob.Bytes()) - + PogrebPut(keyDatabase, []byte(name), res) tlsCertificate, err := tls.X509KeyPair(res.Certificate, res.PrivateKey) if err != nil { return tls.Certificate{}, err @@ -382,20 +382,11 @@ func mockCert(domain string, msg string) tls.Certificate { IssuerCertificate: outBytes, Domain: domain, } - var resGob bytes.Buffer - resEnc := gob.NewEncoder(&resGob) - err = resEnc.Encode(res) - if err != nil { - panic(err) - } databaseName := domain if domain == "*" + string(MainDomainSuffix) || domain == string(MainDomainSuffix[1:]) { databaseName = string(MainDomainSuffix) } - err = keyDatabase.Put([]byte(databaseName), resGob.Bytes()) - if err != nil { - panic(err) - } + PogrebPut(keyDatabase, []byte(databaseName), res) tlsCertificate, err := tls.X509KeyPair(res.Certificate, res.PrivateKey) if err != nil { @@ -585,30 +576,21 @@ func setupCertificates() { } // update main cert - resBytes, err = keyDatabase.Get(MainDomainSuffix) - if err != nil { - // key database is not working - panic(err) - } - - resGob := bytes.NewBuffer(resBytes) - resDec := gob.NewDecoder(resGob) res := &certificate.Resource{} - err = resDec.Decode(res) - if err != nil { - panic(err) - } + if !PogrebGet(keyDatabase, MainDomainSuffix, res) { + log.Printf("[ERROR] Couldn't renew certificate for main domain: %s", "expected main domain cert to exist, but it's missing - seems like the database is corrupted") + } else { + tlsCertificates, err := certcrypto.ParsePEMBundle(res.Certificate) - tlsCertificates, err := certcrypto.ParsePEMBundle(res.Certificate) - - // renew main certificate 30 days before it expires - if !tlsCertificates[0].NotAfter.After(time.Now().Add(-30 * 24 * time.Hour)) { - go (func() { - _, err = obtainCert(mainDomainAcmeClient, []string{"*" + string(MainDomainSuffix), string(MainDomainSuffix[1:])}, res, "") - if err != nil { - log.Printf("Couldn't renew certificate for *%s: %s", MainDomainSuffix, err) - } - })() + // renew main certificate 30 days before it expires + if !tlsCertificates[0].NotAfter.After(time.Now().Add(-30 * 24 * time.Hour)) { + go (func() { + _, err = obtainCert(mainDomainAcmeClient, []string{"*" + string(MainDomainSuffix), string(MainDomainSuffix[1:])}, res, "") + if err != nil { + log.Printf("[ERROR] Couldn't renew certificate for main domain: %s", err) + } + })() + } } time.Sleep(12 * time.Hour) diff --git a/helpers.go b/helpers.go index 506573b..46a1492 100644 --- a/helpers.go +++ b/helpers.go @@ -1,6 +1,10 @@ package main -import "bytes" +import ( + "bytes" + "encoding/gob" + "github.com/akrylysov/pogreb" +) // GetHSTSHeader returns a HSTS header with includeSubdomains & preload for MainDomainSuffix and RawDomain, or an empty // string for custom domains. @@ -19,3 +23,34 @@ func TrimHostPort(host []byte) []byte { } return host } + +func PogrebPut(db *pogreb.DB, name []byte, obj interface{}) { + var resGob bytes.Buffer + resEnc := gob.NewEncoder(&resGob) + err := resEnc.Encode(obj) + if err != nil { + panic(err) + } + err = db.Put(name, resGob.Bytes()) + if err != nil { + panic(err) + } +} + +func PogrebGet(db *pogreb.DB, name []byte, obj interface{}) bool { + resBytes, err := db.Get(name) + if err != nil { + panic(err) + } + if resBytes == nil { + return false + } + + resGob := bytes.NewBuffer(resBytes) + resDec := gob.NewDecoder(resGob) + err = resDec.Decode(obj) + if err != nil { + panic(err) + } + return true +}