Google Cloud Storage C++ Client  1.32.1
A C++ Client Library for Google Cloud Storage
client.cc
Go to the documentation of this file.
1 // Copyright 2018 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "google/cloud/storage/client.h"
16 #include "google/cloud/storage/internal/curl_client.h"
17 #include "google/cloud/storage/internal/curl_handle.h"
18 #include "google/cloud/storage/internal/openssl_util.h"
19 #include "google/cloud/storage/oauth2/service_account_credentials.h"
20 #include "google/cloud/internal/algorithm.h"
21 #include "google/cloud/internal/filesystem.h"
22 #include "google/cloud/log.h"
23 #include "absl/memory/memory.h"
24 #include <openssl/md5.h>
25 #include <fstream>
26 #include <thread>
27 
28 namespace google {
29 namespace cloud {
30 namespace storage {
31 inline namespace STORAGE_CLIENT_NS {
32 
33 static_assert(std::is_copy_constructible<storage::Client>::value,
34  "storage::Client must be constructible");
35 static_assert(std::is_copy_assignable<storage::Client>::value,
36  "storage::Client must be assignable");
37 
39  : Client(Client::InternalOnlyNoDecorations{},
40  Client::CreateDefaultInternalClient(
41  internal::DefaultOptionsWithCredentials(std::move(opts)))) {}
42 
43 std::shared_ptr<internal::RawClient> Client::CreateDefaultInternalClient(
44  Options const& opts, std::shared_ptr<internal::RawClient> client) {
45  using ::google::cloud::internal::Contains;
46  auto const& tracing_components = opts.get<TracingComponentsOption>();
47  auto const enable_logging = Contains(tracing_components, "raw-client") ||
48  Contains(tracing_components, "rpc");
49  if (enable_logging) {
50  client = std::make_shared<internal::LoggingClient>(std::move(client));
51  }
52  return std::make_shared<internal::RetryClient>(std::move(client), opts);
53 }
54 
55 std::shared_ptr<internal::RawClient> Client::CreateDefaultInternalClient(
56  Options const& opts) {
57  return CreateDefaultInternalClient(opts, internal::CurlClient::Create(opts));
58 }
59 
61 
62 ObjectReadStream Client::ReadObjectImpl(
63  internal::ReadObjectRangeRequest const& request) {
64  auto source = raw_client_->ReadObject(request);
65  if (!source) {
66  ObjectReadStream error_stream(
67  absl::make_unique<internal::ObjectReadStreambuf>(
68  request, std::move(source).status()));
69  error_stream.setstate(std::ios::badbit | std::ios::eofbit);
70  return error_stream;
71  }
72  auto stream =
73  ObjectReadStream(absl::make_unique<internal::ObjectReadStreambuf>(
74  request, *std::move(source),
75  request.GetOption<ReadFromOffset>().value_or(0)));
76  (void)stream.peek();
77 #if !GOOGLE_CLOUD_CPP_HAVE_EXCEPTIONS
78  // Without exceptions the streambuf cannot report errors, so we have to
79  // manually update the status bits.
80  if (!stream.status().ok()) {
81  stream.setstate(std::ios::badbit | std::ios::eofbit);
82  }
83 #endif // GOOGLE_CLOUD_CPP_HAVE_EXCEPTIONS
84  return stream;
85 }
86 
87 ObjectWriteStream Client::WriteObjectImpl(
88  internal::ResumableUploadRequest const& request) {
89  auto session = raw_client_->CreateResumableSession(request);
90  if (!session) {
91  auto error = absl::make_unique<internal::ResumableUploadSessionError>(
92  std::move(session).status());
93 
94  ObjectWriteStream error_stream(
95  absl::make_unique<internal::ObjectWriteStreambuf>(
96  std::move(error), 0, internal::CreateNullHashFunction(),
97  internal::HashValues{}, internal::CreateNullHashValidator(),
99  error_stream.setstate(std::ios::badbit | std::ios::eofbit);
100  error_stream.Close();
101  return error_stream;
102  }
103  return ObjectWriteStream(absl::make_unique<internal::ObjectWriteStreambuf>(
104  *std::move(session), raw_client_->client_options().upload_buffer_size(),
105  internal::CreateHashFunction(request),
106  internal::HashValues{
107  request.GetOption<Crc32cChecksumValue>().value_or(""),
108  request.GetOption<MD5HashValue>().value_or(""),
109  },
110  internal::CreateHashValidator(request),
111  request.GetOption<AutoFinalize>().value_or(
113 }
114 
115 bool Client::UseSimpleUpload(std::string const& file_name,
116  std::size_t& size) const {
117  auto status = google::cloud::internal::status(file_name);
118  if (!is_regular(status)) {
119  return false;
120  }
121  auto const fs = google::cloud::internal::file_size(file_name);
122  if (fs <= raw_client_->client_options().maximum_simple_upload_size()) {
123  size = static_cast<std::size_t>(fs);
124  return true;
125  }
126  return false;
127 }
128 
129 StatusOr<ObjectMetadata> Client::UploadFileSimple(
130  std::string const& file_name, std::size_t file_size,
131  internal::InsertObjectMediaRequest request) {
132  auto upload_offset = request.GetOption<UploadFromOffset>().value_or(0);
133  if (file_size < upload_offset) {
134  std::ostringstream os;
135  os << __func__ << "(" << request << ", " << file_name
136  << "): UploadFromOffset (" << upload_offset
137  << ") is bigger than the size of file source (" << file_size << ")";
138  return Status(StatusCode::kInvalidArgument, std::move(os).str());
139  }
140  auto upload_size = (std::min)(
141  request.GetOption<UploadLimit>().value_or(file_size - upload_offset),
142  file_size - upload_offset);
143 
144  std::ifstream is(file_name, std::ios::binary);
145  if (!is.is_open()) {
146  std::ostringstream os;
147  os << __func__ << "(" << request << ", " << file_name
148  << "): cannot open upload file source";
149  return Status(StatusCode::kNotFound, std::move(os).str());
150  }
151 
152  std::string payload(static_cast<std::size_t>(upload_size), char{});
153  is.seekg(upload_offset, std::ios::beg);
154  is.read(&payload[0], payload.size());
155  if (static_cast<std::size_t>(is.gcount()) < payload.size()) {
156  std::ostringstream os;
157  os << __func__ << "(" << request << ", " << file_name << "): Actual read ("
158  << is.gcount() << ") is smaller than upload_size (" << payload.size()
159  << ")";
160  return Status(StatusCode::kInternal, std::move(os).str());
161  }
162  is.close();
163  request.set_contents(std::move(payload));
164 
165  return raw_client_->InsertObjectMedia(request);
166 }
167 
168 StatusOr<ObjectMetadata> Client::UploadFileResumable(
169  std::string const& file_name,
170  google::cloud::storage::internal::ResumableUploadRequest request) {
171  auto upload_offset = request.GetOption<UploadFromOffset>().value_or(0);
172  auto status = google::cloud::internal::status(file_name);
173  if (!is_regular(status)) {
174  GCP_LOG(WARNING) << "Trying to upload " << file_name
175  << R"""( which is not a regular file.
176 This is often a problem because:
177  - Some non-regular files are infinite sources of data, and the load will
178  never complete.
179  - Some non-regular files can only be read once, and UploadFile() may need to
180  read the file more than once to compute the checksum and hashes needed to
181  preserve data integrity.
182 
183 Consider using UploadLimit option or Client::WriteObject(). You may also need to disable data
184 integrity checks using the DisableMD5Hash() and DisableCrc32cChecksum() options.
185 )""";
186  } else {
187  std::error_code size_err;
188  auto file_size = google::cloud::internal::file_size(file_name, size_err);
189  if (size_err) {
190  return Status(StatusCode::kNotFound, size_err.message());
191  }
192  if (file_size < upload_offset) {
193  std::ostringstream os;
194  os << __func__ << "(" << request << ", " << file_name
195  << "): UploadFromOffset (" << upload_offset
196  << ") is bigger than the size of file source (" << file_size << ")";
197  return Status(StatusCode::kInvalidArgument, std::move(os).str());
198  }
199 
200  auto upload_size = (std::min)(
201  request.GetOption<UploadLimit>().value_or(file_size - upload_offset),
202  file_size - upload_offset);
203  request.set_option(UploadContentLength(upload_size));
204  }
205  std::ifstream source(file_name, std::ios::binary);
206  if (!source.is_open()) {
207  std::ostringstream os;
208  os << __func__ << "(" << request << ", " << file_name
209  << "): cannot open upload file source";
210  return Status(StatusCode::kNotFound, std::move(os).str());
211  }
212  // We set its offset before passing it to `UploadStreamResumable` so we don't
213  // need to compute `UploadFromOffset` again.
214  source.seekg(upload_offset, std::ios::beg);
215  return UploadStreamResumable(source, request);
216 }
217 
218 // NOLINTNEXTLINE(readability-make-member-function-const)
219 StatusOr<ObjectMetadata> Client::UploadStreamResumable(
220  std::istream& source, internal::ResumableUploadRequest const& request) {
221  StatusOr<std::unique_ptr<internal::ResumableUploadSession>> session_status =
222  raw_client_->CreateResumableSession(request);
223  if (!session_status) {
224  return std::move(session_status).status();
225  }
226 
227  auto session = std::move(*session_status);
228  // How many bytes of the local file are uploaded to the GCS server.
229  auto server_size = session->next_expected_byte();
230  auto upload_limit = request.GetOption<UploadLimit>().value_or(
231  (std::numeric_limits<std::uint64_t>::max)());
232  // If `server_size == upload_limit`, we will upload an empty string and
233  // finalize the upload.
234  if (server_size > upload_limit) {
236  "UploadLimit (" + std::to_string(upload_limit) +
237  ") is not bigger than the uploaded size (" +
238  std::to_string(server_size) + ") on GCS server");
239  }
240  source.seekg(server_size, std::ios::cur);
241 
242  // GCS requires chunks to be a multiple of 256KiB.
243  auto chunk_size = internal::UploadChunkRequest::RoundUpToQuantum(
244  raw_client_->client_options().upload_buffer_size());
245 
246  StatusOr<internal::ResumableUploadResponse> upload_response(
247  internal::ResumableUploadResponse{});
248  // We iterate while `source` is good, the upload size does not reach the
249  // `UploadLimit` and the retry policy has not been exhausted.
250  bool reach_upload_limit = false;
251  internal::ConstBufferSequence buffers(1);
252  std::vector<char> buffer(chunk_size);
253  while (!source.eof() && upload_response &&
254  !upload_response->payload.has_value() && !reach_upload_limit) {
255  // Read a chunk of data from the source file.
256  if (upload_limit - server_size <= chunk_size) {
257  // We don't want the `source_size` to exceed `upload_limit`.
258  chunk_size = static_cast<std::size_t>(upload_limit - server_size);
259  reach_upload_limit = true;
260  }
261  source.read(buffer.data(), buffer.size());
262  auto gcount = static_cast<std::size_t>(source.gcount());
263  bool final_chunk = (gcount < buffer.size()) || reach_upload_limit;
264  auto source_size = session->next_expected_byte() + gcount;
265  auto expected = source_size;
266  buffers[0] = internal::ConstBuffer{buffer.data(), gcount};
267  if (final_chunk) {
268  upload_response = session->UploadFinalChunk(buffers, source_size, {});
269  } else {
270  upload_response = session->UploadChunk(buffers);
271  }
272  if (!upload_response) {
273  return std::move(upload_response).status();
274  }
275  if (session->next_expected_byte() != expected) {
276  // Defensive programming: unless there is a bug, this should be dead code.
277  return Status(
279  "Unexpected last committed byte expected=" +
280  std::to_string(expected) +
281  " got=" + std::to_string(session->next_expected_byte()) +
282  ". This is a bug, please report it at "
283  "https://github.com/googleapis/google-cloud-cpp/issues/new");
284  }
285 
286  // We only update `server_size` when uploading is successful.
287  server_size = expected;
288  }
289 
290  if (!upload_response) {
291  return std::move(upload_response).status();
292  }
293 
294  return *std::move(upload_response->payload);
295 }
296 
297 Status Client::DownloadFileImpl(internal::ReadObjectRangeRequest const& request,
298  std::string const& file_name) {
299  auto report_error = [&request, file_name](char const* func, char const* what,
300  Status const& status) {
301  std::ostringstream msg;
302  msg << func << "(" << request << ", " << file_name << "): " << what
303  << " - status.message=" << status.message();
304  return Status(status.code(), std::move(msg).str());
305  };
306 
307  auto stream = ReadObjectImpl(request);
308  if (!stream.status().ok()) {
309  return report_error(__func__, "cannot open download source object",
310  stream.status());
311  }
312 
313  // Open the destination file, and immediate raise an exception on failure.
314  std::ofstream os(file_name, std::ios::binary);
315  if (!os.is_open()) {
316  return report_error(
317  __func__, "cannot open download destination file",
318  Status(StatusCode::kInvalidArgument, "ofstream::open()"));
319  }
320 
321  std::string buffer;
322  buffer.resize(raw_client_->client_options().download_buffer_size(), '\0');
323  do {
324  stream.read(&buffer[0], buffer.size());
325  os.write(buffer.data(), stream.gcount());
326  } while (os.good() && stream.good());
327  os.close();
328  if (!os.good()) {
329  return report_error(__func__, "cannot close download destination file",
330  Status(StatusCode::kUnknown, "ofstream::close()"));
331  }
332  if (!stream.status().ok()) {
333  return report_error(__func__, "error reading download source object",
334  stream.status());
335  }
336  return Status();
337 }
338 
339 // NOLINTNEXTLINE(readability-make-member-function-const)
340 std::string Client::SigningEmail(SigningAccount const& signing_account) {
341  if (signing_account.has_value()) {
342  return signing_account.value();
343  }
344  return raw_client_->client_options().credentials()->AccountEmail();
345 }
346 
347 StatusOr<Client::SignBlobResponseRaw> Client::SignBlobImpl(
348  SigningAccount const& signing_account, std::string const& string_to_sign) {
349  auto credentials = raw_client_->client_options().credentials();
350 
351  std::string signing_account_email = SigningEmail(signing_account);
352  // First try to sign locally.
353  auto signed_blob = credentials->SignBlob(signing_account, string_to_sign);
354  if (signed_blob) {
355  return SignBlobResponseRaw{credentials->KeyId(), *std::move(signed_blob)};
356  }
357 
358  // If signing locally fails that may be because the credentials do not
359  // support signing, or because the signing account is different than the
360  // credentials account. In either case, try to sign using the API.
361  internal::SignBlobRequest sign_request(
362  signing_account_email, internal::Base64Encode(string_to_sign), {});
363  auto response = raw_client_->SignBlob(sign_request);
364  if (!response) return response.status();
365  auto decoded = internal::Base64Decode(response->signed_blob);
366  if (!decoded) return std::move(decoded).status();
367  return SignBlobResponseRaw{response->key_id, *std::move(decoded)};
368 }
369 
370 StatusOr<std::string> Client::SignUrlV2(
371  internal::V2SignUrlRequest const& request) {
372  SigningAccount const& signing_account = request.signing_account();
373  auto signed_blob = SignBlobImpl(signing_account, request.StringToSign());
374  if (!signed_blob) {
375  return signed_blob.status();
376  }
377 
378  internal::CurlHandle curl;
379  auto encoded = internal::Base64Encode(signed_blob->signed_blob);
380  std::string signature = curl.MakeEscapedString(encoded).get();
381 
382  std::ostringstream os;
383  os << "https://storage.googleapis.com/" << request.bucket_name();
384  if (!request.object_name().empty()) {
385  os << '/' << curl.MakeEscapedString(request.object_name()).get();
386  }
387  os << "?GoogleAccessId=" << SigningEmail(signing_account)
388  << "&Expires=" << request.expiration_time_as_seconds().count()
389  << "&Signature=" << signature;
390 
391  return std::move(os).str();
392 }
393 
394 StatusOr<std::string> Client::SignUrlV4(internal::V4SignUrlRequest request) {
395  auto valid = request.Validate();
396  if (!valid.ok()) {
397  return valid;
398  }
399  request.AddMissingRequiredHeaders();
400  SigningAccount const& signing_account = request.signing_account();
401  auto signing_email = SigningEmail(signing_account);
402 
403  auto string_to_sign = request.StringToSign(signing_email);
404  auto signed_blob = SignBlobImpl(signing_account, string_to_sign);
405  if (!signed_blob) {
406  return signed_blob.status();
407  }
408 
409  std::string signature = internal::HexEncode(signed_blob->signed_blob);
410  internal::CurlHandle curl;
411  std::ostringstream os;
412  os << request.HostnameWithBucket();
413  for (auto& part : request.ObjectNameParts()) {
414  os << '/' << curl.MakeEscapedString(part).get();
415  }
416  os << "?" << request.CanonicalQueryString(signing_email)
417  << "&X-Goog-Signature=" << signature;
418 
419  return std::move(os).str();
420 }
421 
422 StatusOr<PolicyDocumentResult> Client::SignPolicyDocument(
423  internal::PolicyDocumentRequest const& request) {
424  SigningAccount const& signing_account = request.signing_account();
425  auto signing_email = SigningEmail(signing_account);
426 
427  auto string_to_sign = request.StringToSign();
428  auto base64_policy = internal::Base64Encode(string_to_sign);
429  auto signed_blob = SignBlobImpl(signing_account, base64_policy);
430  if (!signed_blob) {
431  return signed_blob.status();
432  }
433 
434  return PolicyDocumentResult{
435  signing_email, request.policy_document().expiration, base64_policy,
436  internal::Base64Encode(signed_blob->signed_blob)};
437 }
438 
439 StatusOr<PolicyDocumentV4Result> Client::SignPolicyDocumentV4(
440  internal::PolicyDocumentV4Request request) {
441  SigningAccount const& signing_account = request.signing_account();
442  auto signing_email = SigningEmail(signing_account);
443  request.SetSigningEmail(signing_email);
444 
445  auto string_to_sign = request.StringToSign();
446  auto escaped = internal::PostPolicyV4Escape(string_to_sign);
447  if (!escaped) return escaped.status();
448  auto base64_policy = internal::Base64Encode(*escaped);
449  auto signed_blob = SignBlobImpl(signing_account, base64_policy);
450  if (!signed_blob) {
451  return signed_blob.status();
452  }
453  std::string signature = internal::HexEncode(signed_blob->signed_blob);
454  auto required_fields = request.RequiredFormFields();
455  required_fields["x-goog-signature"] = signature;
456  required_fields["policy"] = base64_policy;
457  return PolicyDocumentV4Result{request.Url(),
458  request.Credentials(),
459  request.ExpirationDate(),
460  base64_policy,
461  signature,
462  "GOOG4-RSA-SHA256",
463  std::move(required_fields)};
464 }
465 
466 std::string CreateRandomPrefixName(std::string const& prefix) {
467  auto constexpr kPrefixNameSize = 16;
468  auto rng = google::cloud::internal::MakeDefaultPRNG();
469  return prefix + google::cloud::internal::Sample(rng, kPrefixNameSize,
470  "abcdefghijklmnopqrstuvwxyz");
471 }
472 
473 namespace internal {
474 
475 ScopedDeleter::ScopedDeleter(
476  std::function<Status(std::string, std::int64_t)> delete_fun)
477  : enabled_(true), delete_fun_(std::move(delete_fun)) {}
478 
479 ScopedDeleter::~ScopedDeleter() {
480  if (enabled_) {
481  ExecuteDelete();
482  }
483 }
484 
485 void ScopedDeleter::Add(ObjectMetadata const& object) {
486  auto generation = object.generation();
487  Add(std::move(object).name(), generation);
488 }
489 
490 void ScopedDeleter::Add(std::string object_name, std::int64_t generation) {
491  object_list_.emplace_back(std::move(object_name), generation);
492 }
493 
494 Status ScopedDeleter::ExecuteDelete() {
495  std::vector<std::pair<std::string, std::int64_t>> object_list;
496  // make sure the dtor will not do this again
497  object_list.swap(object_list_);
498 
499  // Perform deletion in reverse order. We rely on it in functions which create
500  // a "lock" object - it is created as the first file and should be removed as
501  // last.
502  for (auto object_it = object_list.rbegin(); object_it != object_list.rend();
503  ++object_it) {
504  Status status = delete_fun_(std::move(object_it->first), object_it->second);
505  // Fail on first error. If the service is unavailable, every deletion
506  // would potentially keep retrying until the timeout passes - this would
507  // take way too much time and would be pointless.
508  if (!status.ok()) {
509  return status;
510  }
511  }
512  return Status();
513 }
514 
515 } // namespace internal
516 
517 } // namespace STORAGE_CLIENT_NS
518 } // namespace storage
519 } // namespace cloud
520 } // namespace google